From f0f7a8811a234b2f33339bdd879786e5b460f1b3 Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Wed, 15 Sep 2021 21:45:43 +0800 Subject: [PATCH 1/8] Move PR from nebula-graph --- src/graph/optimizer/OptimizerUtils.cpp | 33 +++++- src/graph/optimizer/OptimizerUtils.h | 4 + .../rule/OptimizeTagIndexScanByFilterRule.cpp | 28 ++++- .../rule/UnionAllIndexScanBaseRule.cpp | 111 +++++++++++++++--- src/graph/util/ExpressionUtils.cpp | 54 +++++++++ src/graph/util/ExpressionUtils.h | 12 ++ src/graph/validator/LookupValidator.cpp | 58 ++++----- src/graph/validator/LookupValidator.h | 9 +- src/graph/visitor/FoldConstantExprVisitor.cpp | 5 + src/graph/visitor/VidExtractVisitor.cpp | 24 ++-- .../test/FoldConstantExprVisitorTest.cpp | 6 + .../features/lookup/EdgeIndexFullScan.feature | 99 ++++++++++++++-- .../features/lookup/TagIndexFullScan.feature | 79 +++++++++++-- 13 files changed, 441 insertions(+), 81 deletions(-) diff --git a/src/graph/optimizer/OptimizerUtils.cpp b/src/graph/optimizer/OptimizerUtils.cpp index 53f7e89c240..87003ed97de 100644 --- a/src/graph/optimizer/OptimizerUtils.cpp +++ b/src/graph/optimizer/OptimizerUtils.cpp @@ -642,7 +642,8 @@ StatusOr selectRelExprIndex(const ColumnDef& field, } auto right = expr->right(); - DCHECK(right->kind() == Expression::Kind::kConstant); + expr->kind() == Expression::Kind::kRelIn ? DCHECK(right->isContainerExpr()) + : DCHECK(right->kind() == Expression::Kind::kConstant); const auto& value = static_cast(right)->value(); ScoredColumnHint hint; @@ -664,6 +665,10 @@ StatusOr selectRelExprIndex(const ColumnDef& field, hint.score = IndexScore::kNotEqual; break; } + case Expression::Kind::kRelIn: { + // check the property has an index + break; + } default: { return Status::Error("Invalid expression kind"); } @@ -912,6 +917,32 @@ bool OptimizerUtils::findOptimalIndex(const Expression* condition, return true; } +// Check if the relational expression has a valid index +// The left operand should either be a kEdgeProperty or kTagProperty expr +bool OptimizerUtils::relExprHasIndex( + const Expression* expr, + const std::vector>& indexItems) { + DCHECK(expr->isRelExpr()); + + for (auto& index : indexItems) { + const auto& fields = index->get_fields(); + if (fields.empty()) { + return false; + } + + auto left = static_cast(expr)->left(); + DCHECK(left->kind() == Expression::Kind::kEdgeProperty || + left->kind() == Expression::Kind::kTagProperty); + + auto propExpr = static_cast(left); + if (propExpr->prop() == fields[0].get_name()) { + return true; + } + } + + return false; +} + void OptimizerUtils::copyIndexScanData(const nebula::graph::IndexScan* from, nebula::graph::IndexScan* to) { to->setEmptyResultSet(from->isEmptyResultSet()); diff --git a/src/graph/optimizer/OptimizerUtils.h b/src/graph/optimizer/OptimizerUtils.h index 02d8efe1bee..4ae562b6e0e 100644 --- a/src/graph/optimizer/OptimizerUtils.h +++ b/src/graph/optimizer/OptimizerUtils.h @@ -95,6 +95,10 @@ class OptimizerUtils { bool* isPrefixScan, nebula::storage::cpp2::IndexQueryContext* ictx); + static bool relExprHasIndex( + const Expression* expr, + const std::vector>& indexItems); + static void copyIndexScanData(const nebula::graph::IndexScan* from, nebula::graph::IndexScan* to); }; diff --git a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp index 58ac2c7047c..ecdec624926 100644 --- a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp +++ b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp @@ -46,6 +46,9 @@ const Pattern& OptimizeTagIndexScanByFilterRule::pattern() const { return pattern; } +// Match 2 kinds of expressions: +// 1. Relational expr +// 2. Logical AND expr bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResult& matched) const { if (!OptRule::match(ctx, matched)) { return false; @@ -60,11 +63,18 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul auto condition = filter->condition(); if (condition->isRelExpr()) { auto relExpr = static_cast(condition); + // If the container in the IN expr has only 1 element, it will be converted to an relEQ + // expr. If more than 1 element found in the container, UnionAllIndexScanBaseRule will be + // applied. + if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) { + auto ContainerOperands = graph::ExpressionUtils::getContainerExprOperands(relExpr->right()); + return ContainerOperands.size() == 1; + } return relExpr->left()->kind() == ExprKind::kTagProperty && relExpr->right()->kind() == ExprKind::kConstant; } if (condition->isLogicalExpr()) { - return condition->kind() == Expression::Kind::kLogicalAnd; + return condition->kind() == ExprKind::kLogicalAnd; } return false; @@ -94,9 +104,23 @@ StatusOr OptimizeTagIndexScanByFilterRule::transform( OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems); + auto condition = filter->condition(); + auto conditionType = condition->kind(); + Expression* transformedExpr = condition->clone(); + + // Stand alone IN expr + if (conditionType == ExprKind::kRelIn) { + if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) { + return TransformResult::noTransform(); + } + transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); + } + + DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr || + transformedExpr->kind() == ExprKind::kRelEQ); IndexQueryContext ictx; bool isPrefixScan = false; - if (!OptimizerUtils::findOptimalIndex(filter->condition(), indexItems, &isPrefixScan, &ictx)) { + if (!OptimizerUtils::findOptimalIndex(transformedExpr, indexItems, &isPrefixScan, &ictx)) { return TransformResult::noTransform(); } diff --git a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp index 16a5003cf43..e2f223861c6 100644 --- a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp +++ b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp @@ -15,6 +15,7 @@ #include "graph/planner/plan/PlanNode.h" #include "graph/planner/plan/Query.h" #include "graph/planner/plan/Scan.h" +#include "graph/util/ExpressionUtils.h" #include "interface/gen-cpp2/storage_types.h" using nebula::graph::Filter; @@ -24,11 +25,18 @@ using nebula::graph::TagIndexFullScan; using nebula::storage::cpp2::IndexQueryContext; using Kind = nebula::graph::PlanNode::Kind; +using ExprKind = nebula::Expression::Kind; using TransformResult = nebula::opt::OptRule::TransformResult; namespace nebula { namespace opt { +// The matched expression should be either a OR expression or an expression that could be +// rewrote to a OR expression. There are 3 senarios. +// 1. OR expr. If OR expr has IN expr operand that has a valid index, expand it to OR expr. +// 2. AND expr such as A in [a, b] AND B, because it can be transformed to (A==a AND B) OR (A==b +// AND B) +// 3. IN expr such as A in [a, b] since it can be transformed to (A==a) OR (A==b) bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const { if (!OptRule::match(ctx, matched)) { return false; @@ -36,13 +44,32 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc auto filter = static_cast(matched.planNode()); auto scan = static_cast(matched.planNode({0, 0})); auto condition = filter->condition(); - if (!condition->isLogicalExpr() || condition->kind() != Expression::Kind::kLogicalOr) { - return false; + auto conditionType = condition->kind(); + + if (condition->isLogicalExpr()) { + if (conditionType == ExprKind::kLogicalOr) { + return true; + } + if (conditionType == ExprKind::kLogicalAnd && + graph::ExpressionUtils::findAny(static_cast(condition), + {ExprKind::kRelIn})) { + return true; + } + // Check logical operands + for (auto operand : static_cast(condition)->operands()) { + if (!operand->isRelExpr() || !operand->isLogicalExpr()) { + return false; + } + } } - for (auto operand : static_cast(condition)->operands()) { - if (!operand->isRelExpr()) { - return false; + // If the number of elements is less or equal than 1, the IN expr will be transformed into a + // relEQ expr by the OptimizeTagIndexScanByFilterRule. + if (condition->isRelExpr()) { + auto relExpr = static_cast(condition); + if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) { + auto operandsVec = graph::ExpressionUtils::getContainerExprOperands(relExpr->right()); + return operandsVec.size() > 1; } } @@ -52,9 +79,10 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc } } - return true; + return false; } +// If the IN expr has only 1 element in its container, it will be converted to an relEQ expr StatusOr UnionAllIndexScanBaseRule::transform(OptContext* ctx, const MatchedResult& matched) const { auto filter = static_cast(matched.planNode()); @@ -62,20 +90,75 @@ StatusOr UnionAllIndexScanBaseRule::transform(OptContext* ctx, auto scan = static_cast(node); auto metaClient = ctx->qctx()->getMetaClient(); - StatusOr>> status; - if (node->kind() == graph::PlanNode::Kind::kTagIndexFullScan) { - status = metaClient->getTagIndexesFromCache(scan->space()); - } else { - status = metaClient->getEdgeIndexesFromCache(scan->space()); - } + auto status = node->kind() == graph::PlanNode::Kind::kTagIndexFullScan + ? metaClient->getTagIndexesFromCache(scan->space()) + : metaClient->getEdgeIndexesFromCache(scan->space()); + NG_RETURN_IF_ERROR(status); auto indexItems = std::move(status).value(); OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems); + // Check whether the prop has index. + // Rewrite if the property in the IN expr has a valid index + if (indexItems.empty()) { + return TransformResult::noTransform(); + } + + auto condition = filter->condition(); + auto conditionType = condition->kind(); + Expression* transformedExpr = condition->clone(); + + // Stand alone IN expr + if (conditionType == ExprKind::kRelIn) { + if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) { + return TransformResult::noTransform(); + } + transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); + } + + // AND expr containing IN expr operand + if (conditionType == ExprKind::kLogicalAnd) { + auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn}); + DCHECK(!relInExprs.empty()); + bool indexFound = false; + // Iterate all operands and expand IN exprs if possible + for (auto& expr : static_cast(transformedExpr)->operands()) { + if (expr->kind() == ExprKind::kRelIn) { + if (OptimizerUtils::relExprHasIndex(transformedExpr, indexItems)) { + expr = graph::ExpressionUtils::rewriteInExpr(expr); + } + } + } + if (!indexFound) { + return TransformResult::noTransform(); + } + + // Reconstruct AND expr using distributive law + } + + // OR expr + if (conditionType == ExprKind::kLogicalOr) { + auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn}); + if (!relInExprs.empty()) { + // Iterate all operands and expand IN exprs if possible + for (auto& expr : static_cast(transformedExpr)->operands()) { + if (expr->kind() == ExprKind::kRelIn) { + if (OptimizerUtils::relExprHasIndex(expr, indexItems)) { + expr = graph::ExpressionUtils::rewriteInExpr(expr); + } + } + } + // Flatten OR exprs + graph::ExpressionUtils::pullOrs(transformedExpr); + } + } + + DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr || + transformedExpr->kind() == ExprKind::kRelEQ); std::vector idxCtxs; - auto condition = static_cast(filter->condition()); - for (auto operand : condition->operands()) { + auto logicalExpr = static_cast(transformedExpr); + for (auto operand : logicalExpr->operands()) { IndexQueryContext ictx; bool isPrefixScan = false; if (!OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)) { diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index a4150564b06..c0ee3605ac0 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -170,6 +170,60 @@ Expression *ExpressionUtils::rewriteAgg2VarProp(const Expression *expr) { return RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter)); } +// Rewrite the IN expr to a relEQ expr if the right operand has only 1 element. +// Rewrite the IN expr to a OR expr if the right operand has more than 1 element. +Expression *ExpressionUtils::rewriteInExpr(const Expression *expr) { + DCHECK(expr->kind() == Expression::Kind::kRelIn); + auto pool = expr->getObjPool(); + auto inExpr = static_cast(expr->clone()); + auto containerOperands = getContainerExprOperands(inExpr->right()); + + auto operandSize = containerOperands.size(); + // container has only 1 element, no need to transform to logical expression + if (operandSize == 1) { + return RelationalExpression::makeEQ(pool, inExpr->left(), containerOperands[0]); + } + + std::vector orExprOperands; + orExprOperands.reserve(operandSize); + // A in [B, C, D] => (A == B) or (A == C) or (A == D) + for (auto *operand : containerOperands) { + orExprOperands.emplace_back(RelationalExpression::makeEQ(pool, inExpr->left(), operand)); + } + auto orExpr = LogicalExpression::makeOr(pool); + orExpr->setOperands(orExprOperands); + + return orExpr; +} + +std::vector ExpressionUtils::getContainerExprOperands(const Expression *expr) { + DCHECK(expr->isContainerExpr()); + auto pool = expr->getObjPool(); + auto containerExpr = expr->clone(); + + std::vector containerOperands; + switch (containerExpr->kind()) { + case Expression::Kind::kList: + containerOperands = static_cast(containerExpr)->get(); + break; + case Expression::Kind::kSet: { + containerOperands = static_cast(containerExpr)->get(); + break; + } + case Expression::Kind::kMap: { + auto mapItems = static_cast(containerExpr)->get(); + // iterate map and add key into containerOperands + for (auto &item : mapItems) { + containerOperands.emplace_back(ConstantExpression::make(pool, std::move(item.first))); + } + break; + } + default: + LOG(FATAL) << "Invalid expression type " << containerExpr->kind(); + } + return containerOperands; +} + StatusOr ExpressionUtils::foldConstantExpr(const Expression *expr) { ObjectPool *objPool = expr->getObjPool(); auto newExpr = expr->clone(); diff --git a/src/graph/util/ExpressionUtils.h b/src/graph/util/ExpressionUtils.h index ef62ecfccba..a921da4fe52 100644 --- a/src/graph/util/ExpressionUtils.h +++ b/src/graph/util/ExpressionUtils.h @@ -66,6 +66,14 @@ class ExpressionUtils { static Expression* rewriteRelExpr(const Expression* expr); static Expression* rewriteRelExprHelper(const Expression* expr, Expression*& relRightOperandExpr); + // Rewrite IN expression into OR expression or relEQ expression + static Expression* rewriteInExpr(const Expression* expr); + + // Return the operands of container expressions + // For list and set, return the operands + // For map, return the keys + static std::vector getContainerExprOperands(const Expression* expr); + // Clone and fold constant expression static StatusOr foldConstantExpr(const Expression* expr); @@ -73,6 +81,10 @@ class ExpressionUtils { static Expression* reduceUnaryNotExpr(const Expression* expr); // Transform filter using multiple expression rewrite strategies + // 1. rewrite relational expressions containing arithmetic operands so that + // all constants are on the right side of relExpr. + // 2. fold constant + // 3. reduce unary expression e.g. !(A and B) => !A or !B static StatusOr filterTransform(const Expression* expr); // Negate the given logical expr: (A && B) -> (!A || !B) diff --git a/src/graph/validator/LookupValidator.cpp b/src/graph/validator/LookupValidator.cpp index c93a49e878c..cc2eeef06f7 100644 --- a/src/graph/validator/LookupValidator.cpp +++ b/src/graph/validator/LookupValidator.cpp @@ -231,12 +231,13 @@ StatusOr LookupValidator::handleLogicalExprOperands(LogicalExpressi auto& operands = lExpr->operands(); for (auto i = 0u; i < operands.size(); i++) { auto operand = lExpr->operand(i); - if (operand->isLogicalExpr()) { - // Not allow different logical expression to use: A AND B OR C - return Status::SemanticError("Not supported filter: %s", lExpr->toString().c_str()); - } + // if (operand->isLogicalExpr()) { + // // Not allow different logical expression to use: A AND B OR C + // return Status::SemanticError("Not supported filter: %s", lExpr->toString().c_str()); + // } auto ret = checkFilter(operand); NG_RETURN_IF_ERROR(ret); + auto newOperand = ret.value(); if (operand != newOperand) { lExpr->setOperand(i, newOperand); @@ -248,7 +249,9 @@ StatusOr LookupValidator::handleLogicalExprOperands(LogicalExpressi StatusOr LookupValidator::checkFilter(Expression* expr) { // TODO: Support IN expression push down if (expr->isRelExpr()) { - return checkRelExpr(static_cast(expr)); + auto relExpr = static_cast(expr); + NG_RETURN_IF_ERROR(checkRelExpr(relExpr)); + return rewriteRelExpr(relExpr); } switch (expr->kind()) { case Expression::Kind::kLogicalOr: { @@ -265,7 +268,7 @@ StatusOr LookupValidator::checkFilter(Expression* expr) { } } -StatusOr LookupValidator::checkRelExpr(RelationalExpression* expr) { +Status LookupValidator::checkRelExpr(RelationalExpression* expr) { auto* left = expr->left(); auto* right = expr->right(); // Does not support filter : schema.col1 > schema.col2 @@ -275,7 +278,7 @@ StatusOr LookupValidator::checkRelExpr(RelationalExpression* expr) } if (left->kind() == Expression::Kind::kLabelAttribute || right->kind() == Expression::Kind::kLabelAttribute) { - return rewriteRelExpr(expr); + return Status::OK(); } return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str()); } @@ -283,42 +286,41 @@ StatusOr LookupValidator::checkRelExpr(RelationalExpression* expr) StatusOr LookupValidator::rewriteRelExpr(RelationalExpression* expr) { // swap LHS and RHS of relExpr if LabelAttributeExpr in on the right, // so that LabelAttributeExpr is always on the left - auto right = expr->right(); - if (right->kind() == Expression::Kind::kLabelAttribute) { + auto rightOperand = expr->right(); + if (rightOperand->kind() == Expression::Kind::kLabelAttribute) { expr = static_cast(reverseRelKind(expr)); } - auto left = expr->left(); - auto* la = static_cast(left); + auto leftOperand = expr->left(); + auto* la = static_cast(leftOperand); if (la->left()->name() != sentence()->from()) { return Status::SemanticError("Schema name error: %s", la->left()->name().c_str()); } // fold constant expression - auto pool = qctx_->objPool(); auto foldRes = ExpressionUtils::foldConstantExpr(expr); NG_RETURN_IF_ERROR(foldRes); expr = static_cast(foldRes.value()); DCHECK_EQ(expr->left()->kind(), Expression::Kind::kLabelAttribute); + // Check schema and value type std::string prop = la->right()->value().getStr(); auto relExprType = expr->kind(); auto c = checkConstExpr(expr->right(), prop, relExprType); NG_RETURN_IF_ERROR(c); - expr->setRight(ConstantExpression::make(pool, std::move(c).value())); + expr->setRight(std::move(c).value()); // rewrite PropertyExpression - if (lookupCtx_->isEdge) { - expr->setLeft(ExpressionUtils::rewriteLabelAttr2EdgeProp(la)); - } else { - expr->setLeft(ExpressionUtils::rewriteLabelAttr2TagProp(la)); - } + auto propExpr = lookupCtx_->isEdge ? ExpressionUtils::rewriteLabelAttr2EdgeProp(la) + : ExpressionUtils::rewriteLabelAttr2TagProp(la); + expr->setLeft(propExpr); return expr; } -StatusOr LookupValidator::checkConstExpr(Expression* expr, - const std::string& prop, - const Expression::Kind kind) { +StatusOr LookupValidator::checkConstExpr(Expression* expr, + const std::string& prop, + const Expression::Kind kind) { + auto* pool = expr->getObjPool(); if (!evaluableExpr(expr)) { return Status::SemanticError("'%s' is not an evaluable expression.", expr->toString().c_str()); } @@ -336,7 +338,7 @@ StatusOr LookupValidator::checkConstExpr(Expression* expr, // Allow different numeric type to compare if (graph::SchemaUtil::propTypeToValueType(type) == Value::Type::FLOAT && v.isInt()) { - return v.toFloat(); + return ConstantExpression::make(pool, v.toFloat()); } else if (graph::SchemaUtil::propTypeToValueType(type) == Value::Type::INT && v.isFloat()) { // col1 < 10.5 range: [min, 11), col1 < 10 range: [min, 10) double f = v.getFloat(); @@ -345,20 +347,21 @@ StatusOr LookupValidator::checkConstExpr(Expression* expr, if (kind == Expression::Kind::kRelGE || kind == Expression::Kind::kRelLT) { // edge case col1 >= 40.0, no need to round up if (std::abs(f - iCeil) < kEpsilon) { - return iFloor; + return ConstantExpression::make(pool, iFloor); } - return iCeil; + return ConstantExpression::make(pool, iCeil); } - return iFloor; + return ConstantExpression::make(pool, iFloor); } + // Check prop type if (v.type() != SchemaUtil::propTypeToValueType(type)) { // allow diffrent types in the IN expression, such as "abc" IN ["abc"] - if (v.type() != Value::Type::LIST) { + if (!expr->isContainerExpr()) { return Status::SemanticError("Column type error : %s", prop.c_str()); } } - return v; + return expr; } StatusOr LookupValidator::checkTSExpr(Expression* expr) { @@ -381,6 +384,7 @@ StatusOr LookupValidator::checkTSExpr(Expression* expr) { } return tsName; } + // Transform (A > B) to (B < A) Expression* LookupValidator::reverseRelKind(RelationalExpression* expr) { auto kind = expr->kind(); diff --git a/src/graph/validator/LookupValidator.h b/src/graph/validator/LookupValidator.h index 3b9c6ab715f..97c900d9d36 100644 --- a/src/graph/validator/LookupValidator.h +++ b/src/graph/validator/LookupValidator.h @@ -37,12 +37,11 @@ class LookupValidator final : public Validator { Status validateYieldEdge(); StatusOr checkFilter(Expression* expr); - StatusOr checkRelExpr(RelationalExpression* expr); + Status checkRelExpr(RelationalExpression* expr); StatusOr checkTSExpr(Expression* expr); - StatusOr checkConstExpr(Expression* expr, - const std::string& prop, - const Expression::Kind kind); - + StatusOr checkConstExpr(Expression* expr, + const std::string& prop, + const Expression::Kind kind); StatusOr rewriteRelExpr(RelationalExpression* expr); Expression* reverseRelKind(RelationalExpression* expr); diff --git a/src/graph/visitor/FoldConstantExprVisitor.cpp b/src/graph/visitor/FoldConstantExprVisitor.cpp index 93b0430b964..47c4af9c533 100644 --- a/src/graph/visitor/FoldConstantExprVisitor.cpp +++ b/src/graph/visitor/FoldConstantExprVisitor.cpp @@ -338,6 +338,11 @@ void FoldConstantExprVisitor::visitBinaryExpr(BinaryExpression *expr) { } Expression *FoldConstantExprVisitor::fold(Expression *expr) { + // Container expresison shuold remain the same type after being folded + if (expr->isContainerExpr()) { + return expr; + } + QueryExpressionContext ctx; auto value = expr->eval(ctx(nullptr)); if (value.type() == Value::Type::NULLVALUE) { diff --git a/src/graph/visitor/VidExtractVisitor.cpp b/src/graph/visitor/VidExtractVisitor.cpp index 66d8684a79e..22be58bbbcc 100644 --- a/src/graph/visitor/VidExtractVisitor.cpp +++ b/src/graph/visitor/VidExtractVisitor.cpp @@ -135,7 +135,7 @@ void VidExtractVisitor::visit(ArithmeticExpression *expr) { void VidExtractVisitor::visit(RelationalExpression *expr) { if (expr->kind() == Expression::Kind::kRelIn) { - // const auto *inExpr = static_cast(expr); + // id(V) IN [List] if (expr->left()->kind() == Expression::Kind::kLabelAttribute) { const auto *labelExpr = static_cast(expr->left()); const auto &label = labelExpr->left()->toString(); @@ -143,29 +143,29 @@ void VidExtractVisitor::visit(RelationalExpression *expr) { {{label, {VidPattern::Vids::Kind::kOtherSource, {}}}}}; return; } + if (expr->left()->kind() != Expression::Kind::kFunctionCall || - expr->right()->kind() != Expression::Kind::kConstant) { + expr->right()->kind() != Expression::Kind::kList) { vidPattern_ = VidPattern{}; return; } + const auto *fCallExpr = static_cast(expr->left()); if (fCallExpr->name() != "id" && fCallExpr->args()->numArgs() != 1 && fCallExpr->args()->args().front()->kind() != Expression::Kind::kLabel) { vidPattern_ = VidPattern{}; return; } - const auto *constExpr = static_cast(expr->right()); - if (constExpr->value().type() != Value::Type::LIST) { - vidPattern_ = VidPattern{}; - return; - } - vidPattern_ = VidPattern{VidPattern::Special::kInUsed, - {{fCallExpr->args()->args().front()->toString(), - {VidPattern::Vids::Kind::kIn, constExpr->value().getList()}}}}; + + auto *listExpr = static_cast(expr->right()); + QueryExpressionContext ctx; + vidPattern_ = + VidPattern{VidPattern::Special::kInUsed, + {{fCallExpr->args()->args().front()->toString(), + {VidPattern::Vids::Kind::kIn, listExpr->eval(ctx(nullptr)).getList()}}}}; return; } else if (expr->kind() == Expression::Kind::kRelEQ) { - // const auto *eqExpr = static_cast(expr); + // id(V) == vid if (expr->left()->kind() == Expression::Kind::kLabelAttribute) { const auto *labelExpr = static_cast(expr->left()); const auto &label = labelExpr->left()->toString(); diff --git a/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp b/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp index 7535592e920..43d4dd0399c 100644 --- a/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp +++ b/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp @@ -104,6 +104,8 @@ TEST_F(FoldConstantExprVisitorTest, TestListExpr) { expr->accept(&visitor); ASSERT_EQ(*expr, *expected) << expr->toString() << " vs. " << expected->toString(); ASSERT(visitor.canBeFolded()); + // type should remain the same after folding + ASSERT_EQ(expr->kind(), Expression::Kind::kList); } TEST_F(FoldConstantExprVisitorTest, TestSetExpr) { @@ -116,6 +118,8 @@ TEST_F(FoldConstantExprVisitorTest, TestSetExpr) { expr->accept(&visitor); ASSERT_EQ(*expr, *expected) << expr->toString() << " vs. " << expected->toString(); ASSERT(visitor.canBeFolded()); + // type should remain the same after folding + ASSERT_EQ(expr->kind(), Expression::Kind::kList); } TEST_F(FoldConstantExprVisitorTest, TestMapExpr) { @@ -131,6 +135,8 @@ TEST_F(FoldConstantExprVisitorTest, TestMapExpr) { expr->accept(&visitor); ASSERT_EQ(*expr, *expected) << expr->toString() << " vs. " << expected->toString(); ASSERT(visitor.canBeFolded()); + // type should remain the same after folding + ASSERT_EQ(expr->kind(), Expression::Kind::kList); } TEST_F(FoldConstantExprVisitorTest, TestCaseExpr) { diff --git a/tests/tck/features/lookup/EdgeIndexFullScan.feature b/tests/tck/features/lookup/EdgeIndexFullScan.feature index 293eaa339e3..d66094cb7f7 100644 --- a/tests/tck/features/lookup/EdgeIndexFullScan.feature +++ b/tests/tck/features/lookup/EdgeIndexFullScan.feature @@ -118,11 +118,10 @@ Feature: Lookup edge index full scan | SrcVID | DstVID | Ranking | edge_1.col1_str | | "102" | "103" | 0 | "Yellow" | And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str IN [\"Red\",\"Yellow\"])"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | When executing query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str IN ["non-existed-name"] YIELD edge_1.col1_str @@ -138,11 +137,91 @@ Feature: Lookup edge index full scan | "103" | "101" | 0 | 33 | | "102" | "103" | 0 | 22 | And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col2_int IN [22,33])"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # a IN b OR c + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col2_int IN [23 - 1 , 66/2] OR edge_1.col2_int==11 + YIELD edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col2_int | + | "101" | "102" | 0 | 11 | + | "102" | "103" | 0 | 22 | + | "103" | "101" | 0 | 33 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # a IN b OR c IN d + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col2_int IN [23 - 1 , 66/2] OR edge_1.col1_str IN [toUpper("r")+"ed1"] + YIELD edge_1.col1_str, edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | + | "101" | "102" | 0 | "Red1" | 11 | + | "102" | "103" | 0 | "Yellow" | 22 | + | "103" | "101" | 0 | "Blue" | 33 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # a IN b AND c (EdgeIndexPrefixScan) + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col2_int IN [11 , 66/2] AND edge_1.col2_int==11 + YIELD edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col2_int | + | "101" | "102" | 0 | 11 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | EdgeIndexPrefixScan | 0 | | + | 0 | Start | | | + # a IN b AND c IN d + # List has only 1 element, so prefixScan is applied + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col2_int IN [11 , 66/2] AND edge_1.col1_str IN [toUpper("r")+"ed1"] + YIELD edge_1.col1_str, edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | + | "101" | "102" | 0 | "Red1" | 11 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | EdgeIndexPrefixScan | 0 | | + | 0 | Start | | | + # a IN b AND c IN d (EdgeIndexFullScan) + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col2_int IN [11 , 66/2] AND edge_1.col1_str IN [toUpper("r")+"ed1", "ABC"] + YIELD edge_1.col1_str, edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | + | "101" | "102" | 0 | "Red1" | 11 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 2 | | + | 2 | Filter | 4 | {"condition": "(((edge_1.col2_int==11) OR (edge_1.col2_int==33)) AND ((edge_1.col1_str==\"Red1\") OR (edge_1.col1_str==\"ABC\")))"} | + | 4 | EdgeIndexFullScan | 0 | | + | 0 | Start | | | When profiling query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str NOT IN ["Blue"] YIELD edge_1.col1_str diff --git a/tests/tck/features/lookup/TagIndexFullScan.feature b/tests/tck/features/lookup/TagIndexFullScan.feature index 22fb26b9738..28e2be20f3f 100644 --- a/tests/tck/features/lookup/TagIndexFullScan.feature +++ b/tests/tck/features/lookup/TagIndexFullScan.feature @@ -1,3 +1,4 @@ +@aiee Feature: Lookup tag index full scan Background: @@ -92,6 +93,7 @@ Feature: Lookup tag index full scan | 4 | TagIndexFullScan | 0 | | | 0 | Start | | | + # TODO: Support compare operator info that has multiple column hints Scenario: Tag with relational IN/NOT IN filter When profiling query: """ @@ -102,11 +104,10 @@ Feature: Lookup tag index full scan | "Jazz" | | "Hornets" | And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name IN [\"Hornets\",\"Jazz\"])"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | When executing query: """ LOOKUP ON team WHERE team.name IN ["non-existed-name"] @@ -124,11 +125,69 @@ Feature: Lookup tag index full scan | "Tony Parker" | 36 | | "Boris Diaw" | 36 | And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(player.age IN [39,36])"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # a IN b OR c + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] OR player.name == "ABC" YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Dirk Nowitzki" | 40 | + | "Joel Embiid" | 25 | + | "Kobe Bryant" | 40 | + | "Kyle Anderson" | 25 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # a IN b OR c IN d + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] OR player.name IN ["Kobe Bryant"] YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Dirk Nowitzki" | 40 | + | "Joel Embiid" | 25 | + | "Kobe Bryant" | 40 | + | "Kyle Anderson" | 25 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # a IN b AND c + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] AND player.name == "Kobe Bryant" YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | TagIndexPrefixScan | 0 | {"indexCtx": {"columnHints":{"endValue":"__EMPTY__","beginValue":"\"Kobe Bryant","scanType":"PREFIX","column":"name"}}} | + | 0 | Start | | | + # a IN b AND c IN d (TagIndexFullScan) + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] AND player.name IN ["ABC", "Kobe Bryant"] YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 2 | | + | 2 | Filter | 4 | {"condition": "(((player.age==40) OR (player.age==25)) AND ((player.name==\"ABC\") OR (player.name==\"Kobe Bryant\")))"} | + | 4 | TagIndexFullScan | 0 | | + | 0 | Start | | | When profiling query: """ LOOKUP ON team WHERE team.name NOT IN ["Hornets", "Jazz"] From 2a6358e7ad7eb4d0520681ef3a15cf481aeeeee3 Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Sat, 18 Sep 2021 21:22:03 +0800 Subject: [PATCH 2/8] Add method to convert AND expr to OR expr --- src/graph/optimizer/OptimizerUtils.cpp | 4 - .../rule/OptimizeTagIndexScanByFilterRule.cpp | 51 ++++- .../rule/UnionAllIndexScanBaseRule.cpp | 82 ++++--- src/graph/util/ExpressionUtils.cpp | 71 +++++- src/graph/util/ExpressionUtils.h | 9 + .../features/lookup/TagIndexFullScan.feature | 210 +++++++++--------- 6 files changed, 270 insertions(+), 157 deletions(-) diff --git a/src/graph/optimizer/OptimizerUtils.cpp b/src/graph/optimizer/OptimizerUtils.cpp index 87003ed97de..fb01ccbf519 100644 --- a/src/graph/optimizer/OptimizerUtils.cpp +++ b/src/graph/optimizer/OptimizerUtils.cpp @@ -665,10 +665,6 @@ StatusOr selectRelExprIndex(const ColumnDef& field, hint.score = IndexScore::kNotEqual; break; } - case Expression::Kind::kRelIn: { - // check the property has an index - break; - } default: { return Status::Error("Invalid expression kind"); } diff --git a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp index ecdec624926..0f4edab4a3d 100644 --- a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp +++ b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp @@ -47,8 +47,14 @@ const Pattern& OptimizeTagIndexScanByFilterRule::pattern() const { } // Match 2 kinds of expressions: -// 1. Relational expr -// 2. Logical AND expr +// +// 1. Relational expr. If it is an IN expr, its list MUST have only 1 element, so it could always be +// transformed to an relEQ expr. i.g. A in [B] => A == B +// It the list has more than 1 element, the expr will be matched with UnionAllIndexScanBaseRule. +// +// 2. Logical AND expr. If the AND expr contains an operand that is an IN expr, the label attribute +// in the IN expr SHUOLD NOT have a valid index, otherwise the expression should be matched with +// UnionAllIndexScanBaseRule. bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResult& matched) const { if (!OptRule::match(ctx, matched)) { return false; @@ -61,6 +67,8 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul } } auto condition = filter->condition(); + + // Case1: relational expr if (condition->isRelExpr()) { auto relExpr = static_cast(condition); // If the container in the IN expr has only 1 element, it will be converted to an relEQ @@ -73,10 +81,16 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul return relExpr->left()->kind() == ExprKind::kTagProperty && relExpr->right()->kind() == ExprKind::kConstant; } - if (condition->isLogicalExpr()) { - return condition->kind() == ExprKind::kLogicalAnd; - } + // Case2: logical AND expr + if (condition->kind() == ExprKind::kLogicalAnd) { + // for (auto operand : static_cast(condition)->operands()) { + // if (operand->kind() == ExprKind::kRelIn) { + // return false; + // } + // } + return true; + } return false; } @@ -108,16 +122,31 @@ StatusOr OptimizeTagIndexScanByFilterRule::transform( auto conditionType = condition->kind(); Expression* transformedExpr = condition->clone(); - // Stand alone IN expr + // Stand alone IN expr with only 1 element in the list, no need to check index if (conditionType == ExprKind::kRelIn) { - if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) { - return TransformResult::noTransform(); - } transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); + DCHECK(transformedExpr->kind() == ExprKind::kRelEQ); + } + + // case2: logical AND expr + if (condition->kind() == ExprKind::kLogicalAnd) { + for (auto& operand : static_cast(condition)->operands()) { + if (operand->kind() == ExprKind::kRelIn) { + auto inExpr = static_cast(operand); + // Do not apply this rule if the IN expr has a valid index or it has only 1 element in the + // list + if (static_cast(inExpr->right())->size() > 1) { + return TransformResult::noTransform(); + } else { + transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); + } + if (OptimizerUtils::relExprHasIndex(inExpr, indexItems)) { + return TransformResult::noTransform(); + } + } + } } - DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr || - transformedExpr->kind() == ExprKind::kRelEQ); IndexQueryContext ictx; bool isPrefixScan = false; if (!OptimizerUtils::findOptimalIndex(transformedExpr, indexItems, &isPrefixScan, &ictx)) { diff --git a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp index e2f223861c6..79814487bf2 100644 --- a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp +++ b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp @@ -33,10 +33,15 @@ namespace opt { // The matched expression should be either a OR expression or an expression that could be // rewrote to a OR expression. There are 3 senarios. -// 1. OR expr. If OR expr has IN expr operand that has a valid index, expand it to OR expr. -// 2. AND expr such as A in [a, b] AND B, because it can be transformed to (A==a AND B) OR (A==b -// AND B) -// 3. IN expr such as A in [a, b] since it can be transformed to (A==a) OR (A==b) +// +// 1. OR expr. If OR expr has an IN expr operand that has a valid index, expand it to OR expr. +// +// 2. AND expr such as A in [a, b] AND B when A has a valid index, because it can be transformed to +// (A==a AND B) OR (A==b AND B) +// +// 3. IN expr with its list size > 1, such as A in [a, b] since it can be transformed to (A==a) OR +// (A==b). +// If the list has a size of 1, the expr will be matched with OptimizeTagIndexScanByFilterRule. bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const { if (!OptRule::match(ctx, matched)) { return false; @@ -47,9 +52,11 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc auto conditionType = condition->kind(); if (condition->isLogicalExpr()) { + // Case1: OR Expr if (conditionType == ExprKind::kLogicalOr) { return true; } + // Case2: AND Expr if (conditionType == ExprKind::kLogicalAnd && graph::ExpressionUtils::findAny(static_cast(condition), {ExprKind::kRelIn})) { @@ -82,7 +89,6 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc return false; } -// If the IN expr has only 1 element in its container, it will be converted to an relEQ expr StatusOr UnionAllIndexScanBaseRule::transform(OptContext* ctx, const MatchedResult& matched) const { auto filter = static_cast(matched.planNode()); @@ -109,38 +115,19 @@ StatusOr UnionAllIndexScanBaseRule::transform(OptContext* ctx, auto conditionType = condition->kind(); Expression* transformedExpr = condition->clone(); - // Stand alone IN expr - if (conditionType == ExprKind::kRelIn) { - if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) { - return TransformResult::noTransform(); - } - transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); - } - - // AND expr containing IN expr operand - if (conditionType == ExprKind::kLogicalAnd) { - auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn}); - DCHECK(!relInExprs.empty()); - bool indexFound = false; - // Iterate all operands and expand IN exprs if possible - for (auto& expr : static_cast(transformedExpr)->operands()) { - if (expr->kind() == ExprKind::kRelIn) { - if (OptimizerUtils::relExprHasIndex(transformedExpr, indexItems)) { - expr = graph::ExpressionUtils::rewriteInExpr(expr); - } + switch (conditionType) { + // Stand alone IN expr + // If it has multiple elements in the list, check valid index before expanding to OR expr + case ExprKind::kRelIn: { + if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) { + return TransformResult::noTransform(); } - } - if (!indexFound) { - return TransformResult::noTransform(); + transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); + break; } - // Reconstruct AND expr using distributive law - } - - // OR expr - if (conditionType == ExprKind::kLogicalOr) { - auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn}); - if (!relInExprs.empty()) { + // AND expr containing IN expr operand + case ExprKind::kLogicalAnd: { // Iterate all operands and expand IN exprs if possible for (auto& expr : static_cast(transformedExpr)->operands()) { if (expr->kind() == ExprKind::kRelIn) { @@ -149,9 +136,32 @@ StatusOr UnionAllIndexScanBaseRule::transform(OptContext* ctx, } } } - // Flatten OR exprs - graph::ExpressionUtils::pullOrs(transformedExpr); + + // Reconstruct AND expr using distributive law + transformedExpr = graph::ExpressionUtils::rewriteLogicalAndToLogicalOr(transformedExpr); + break; + } + + // OR expr + case ExprKind::kLogicalOr: { + auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn}); + if (!relInExprs.empty()) { + // Iterate all operands and expand IN exprs if possible + for (auto& expr : static_cast(transformedExpr)->operands()) { + if (expr->kind() == ExprKind::kRelIn) { + if (OptimizerUtils::relExprHasIndex(expr, indexItems)) { + expr = graph::ExpressionUtils::rewriteInExpr(expr); + } + } + } + // Flatten OR exprs + graph::ExpressionUtils::pullOrs(transformedExpr); + } + break; } + default: + LOG(FATAL) << "Invalid expression kind: " << static_cast(conditionType); + break; } DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr || diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index c0ee3605ac0..1a75fb5f0b2 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -64,6 +64,22 @@ std::vector ExpressionUtils::collectAll( return std::move(visitor).results(); } +// Find all expression except fot the specified kind +// Empty for not found any one +std::vector ExpressionUtils::collectAllExcept( + const Expression *self, const std::unordered_set &excludedKind) { + auto finder = [&excludedKind](const Expression *expr) -> bool { + // return true if the current expr kind is in the excludedKind list + if (excludedKind.find(expr->kind()) == excludedKind.end()) { + return true; + } + return false; + }; + FindVisitor visitor(finder, true); + const_cast(self)->accept(&visitor); + return std::move(visitor).results(); +} + std::vector ExpressionUtils::findAllStorage(const Expression *expr) { return collectAll(expr, {Expression::Kind::kTagProperty, @@ -171,7 +187,7 @@ Expression *ExpressionUtils::rewriteAgg2VarProp(const Expression *expr) { } // Rewrite the IN expr to a relEQ expr if the right operand has only 1 element. -// Rewrite the IN expr to a OR expr if the right operand has more than 1 element. +// Rewrite the IN expr to an OR expr if the right operand has more than 1 element. Expression *ExpressionUtils::rewriteInExpr(const Expression *expr) { DCHECK(expr->kind() == Expression::Kind::kRelIn); auto pool = expr->getObjPool(); @@ -196,6 +212,59 @@ Expression *ExpressionUtils::rewriteInExpr(const Expression *expr) { return orExpr; } +Expression *ExpressionUtils::rewriteLogicalAndToLogicalOr(const Expression *expr) { + DCHECK(expr->kind() == Expression::Kind::kLogicalAnd); + auto pool = expr->getObjPool(); + auto logicalAndExpr = static_cast(expr->clone()); + + // Extract all OR expr + auto orExprList = collectAll(logicalAndExpr, {Expression::Kind::kLogicalOr}); + auto nonOrExprList = collectAllExcept(logicalAndExpr, {Expression::Kind::kLogicalOr}); + + DCHECK_GT(orExprList.size(), 1); + std::vector> orExprOperands; + orExprOperands.reserve(orExprList.size()); + // orExprOperands.emplace_back(std::move(orExprList[0])); + + // Merge the elements of vec2 into each subVec of vec1 + // [[A], [B]] and [C, D] => [[A, C], [A, D], [B, C], [B,D]] + auto mergeVecs = [](std::vector> &vec1, + const std::vector vec2) { + std::vector> res; + for (auto &ele1 : vec1) { + for (const auto &ele2 : vec2) { + auto tempSubVec = ele1; + tempSubVec.emplace_back(std::move(ele2)); + res.emplace_back(std::move(tempSubVec)); + } + } + return res; + }; + + // Iterate all OR exprs + for (auto curExpr : orExprList) { + auto curLogicalOrExpr = static_cast(const_cast(curExpr)); + // auto tempExpr = LogicalExpression::makeAnd(pool); + + auto curOrOperands = curLogicalOrExpr->operands(); + orExprOperands = mergeVecs(orExprOperands, curOrOperands); + } + + // orExprOperands is a 2D vecter where each sub-vecter is the operands of AND expression. + // [[A, C], [A, D], [B, C], [B,D]] => (A and C) or (A and D) or (B and C) or (B or D) + std::vector andExprList; + andExprList.reserve(orExprOperands.size()); + for (const auto &operand : orExprOperands) { + auto andExpr = LogicalExpression::makeAnd(pool); + andExpr->setOperands(operand); + andExprList.emplace_back(std::move(andExpr)); + } + + auto orExpr = LogicalExpression::makeOr(pool); + orExpr->setOperands(andExprList); + return orExpr; +} + std::vector ExpressionUtils::getContainerExprOperands(const Expression *expr) { DCHECK(expr->isContainerExpr()); auto pool = expr->getObjPool(); diff --git a/src/graph/util/ExpressionUtils.h b/src/graph/util/ExpressionUtils.h index a921da4fe52..94c9aaf9dc0 100644 --- a/src/graph/util/ExpressionUtils.h +++ b/src/graph/util/ExpressionUtils.h @@ -45,6 +45,9 @@ class ExpressionUtils { static std::vector collectAll( const Expression* self, const std::unordered_set& expected); + static std::vector collectAllExcept( + const Expression* self, const std::unordered_set& excludedKind); + static std::vector findAllStorage(const Expression* expr); static std::vector findAllInputVariableProp(const Expression* expr); @@ -69,6 +72,12 @@ class ExpressionUtils { // Rewrite IN expression into OR expression or relEQ expression static Expression* rewriteInExpr(const Expression* expr); + // Rewrite Logical AND expr to Logical OR expr using distributive law + // Examples: + // A and (B or C) => (A and B) or (A and C) + // (A or B) and (C or D) => (A and C) or (A and D) or (B and C) or (B or D) + static Expression* rewriteLogicalAndToLogicalOr(const Expression* expr); + // Return the operands of container expressions // For list and set, return the operands // For map, return the keys diff --git a/tests/tck/features/lookup/TagIndexFullScan.feature b/tests/tck/features/lookup/TagIndexFullScan.feature index 28e2be20f3f..9e6583aaf47 100644 --- a/tests/tck/features/lookup/TagIndexFullScan.feature +++ b/tests/tck/features/lookup/TagIndexFullScan.feature @@ -1,4 +1,3 @@ -@aiee Feature: Lookup tag index full scan Background: @@ -94,6 +93,7 @@ Feature: Lookup tag index full scan | 0 | Start | | | # TODO: Support compare operator info that has multiple column hints + @aiee Scenario: Tag with relational IN/NOT IN filter When profiling query: """ @@ -188,110 +188,110 @@ Feature: Lookup tag index full scan | 2 | Filter | 4 | {"condition": "(((player.age==40) OR (player.age==25)) AND ((player.name==\"ABC\") OR (player.name==\"Kobe Bryant\")))"} | | 4 | TagIndexFullScan | 0 | | | 0 | Start | | | - When profiling query: - """ - LOOKUP ON team WHERE team.name NOT IN ["Hornets", "Jazz"] - """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - | "Bucks" | - | "Bulls" | - | "Cavaliers" | - | "Celtics" | - | "Clippers" | - | "Grizzlies" | - | "Hawks" | - | "Heat" | - | "Wizards" | - | "Warriors" | - | "Kings" | - | "Knicks" | - | "Lakers" | - | "Magic" | - | "Mavericks" | - | "Nets" | - | "Nuggets" | - | "Pacers" | - | "Pelicans" | - | "Pistons" | - | "Raptors" | - | "Rockets" | - | "Spurs" | - | "Suns" | - | "Thunders" | - | "Timberwolves" | - | "Trail Blazers" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name NOT IN [\"Hornets\",\"Jazz\"])"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | - When profiling query: - """ - LOOKUP ON player WHERE player.age NOT IN [40 - 1 , 72/2] YIELD player.age - """ - Then the result should be, in any order: - | VertexID | player.age | - | "Yao Ming" | 38 | - | "Aron Baynes" | 32 | - | "Ben Simmons" | 22 | - | "Blake Griffin" | 30 | - | "Vince Carter" | 42 | - | "Carmelo Anthony" | 34 | - | "Chris Paul" | 33 | - | "Cory Joseph" | 27 | - | "Damian Lillard" | 28 | - | "Danny Green" | 31 | - | "David West" | 38 | - | "DeAndre Jordan" | 30 | - | "Dejounte Murray" | 29 | - | "Dirk Nowitzki" | 40 | - | "Dwight Howard" | 33 | - | "Dwyane Wade" | 37 | - | "Giannis Antetokounmpo" | 24 | - | "Grant Hill" | 46 | - | "JaVale McGee" | 31 | - | "James Harden" | 29 | - | "Jason Kidd" | 45 | - | "Joel Embiid" | 25 | - | "Jonathon Simmons" | 29 | - | "Kevin Durant" | 30 | - | "Klay Thompson" | 29 | - | "Kobe Bryant" | 40 | - | "Kristaps Porzingis" | 23 | - | "Kyle Anderson" | 25 | - | "Kyrie Irving" | 26 | - | "LaMarcus Aldridge" | 33 | - | "LeBron James" | 34 | - | "Luka Doncic" | 20 | - | "Manu Ginobili" | 41 | - | "Marc Gasol" | 34 | - | "Marco Belinelli" | 32 | - | "Nobody" | 0 | - | "Null1" | -1 | - | "Null2" | -2 | - | "Null3" | -3 | - | "Null4" | -4 | - | "Paul Gasol" | 38 | - | "Paul George" | 28 | - | "Rajon Rondo" | 33 | - | "Ray Allen" | 43 | - | "Ricky Rubio" | 28 | - | "Rudy Gay" | 32 | - | "Russell Westbrook" | 30 | - | "Shaquile O'Neal" | 47 | - | "Stephen Curry" | 31 | - | "Steve Nash" | 45 | - | "Tiago Splitter" | 34 | - | "Tim Duncan" | 42 | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(player.age NOT IN [39,36])"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + # When profiling query: + # """ + # LOOKUP ON team WHERE team.name NOT IN ["Hornets", "Jazz"] + # """ + # Then the result should be, in any order: + # | VertexID | + # | "76ers" | + # | "Bucks" | + # | "Bulls" | + # | "Cavaliers" | + # | "Celtics" | + # | "Clippers" | + # | "Grizzlies" | + # | "Hawks" | + # | "Heat" | + # | "Wizards" | + # | "Warriors" | + # | "Kings" | + # | "Knicks" | + # | "Lakers" | + # | "Magic" | + # | "Mavericks" | + # | "Nets" | + # | "Nuggets" | + # | "Pacers" | + # | "Pelicans" | + # | "Pistons" | + # | "Raptors" | + # | "Rockets" | + # | "Spurs" | + # | "Suns" | + # | "Thunders" | + # | "Timberwolves" | + # | "Trail Blazers" | + # And the execution plan should be: + # | id | name | dependencies | operator info | + # | 3 | Project | 2 | | + # | 2 | Filter | 4 | {"condition": "(team.name NOT IN [\"Hornets\",\"Jazz\"])"} | + # | 4 | TagIndexFullScan | 0 | | + # | 0 | Start | | | + # When profiling query: + # """ + # LOOKUP ON player WHERE player.age NOT IN [40 - 1 , 72/2] YIELD player.age + # """ + # Then the result should be, in any order: + # | VertexID | player.age | + # | "Yao Ming" | 38 | + # | "Aron Baynes" | 32 | + # | "Ben Simmons" | 22 | + # | "Blake Griffin" | 30 | + # | "Vince Carter" | 42 | + # | "Carmelo Anthony" | 34 | + # | "Chris Paul" | 33 | + # | "Cory Joseph" | 27 | + # | "Damian Lillard" | 28 | + # | "Danny Green" | 31 | + # | "David West" | 38 | + # | "DeAndre Jordan" | 30 | + # | "Dejounte Murray" | 29 | + # | "Dirk Nowitzki" | 40 | + # | "Dwight Howard" | 33 | + # | "Dwyane Wade" | 37 | + # | "Giannis Antetokounmpo" | 24 | + # | "Grant Hill" | 46 | + # | "JaVale McGee" | 31 | + # | "James Harden" | 29 | + # | "Jason Kidd" | 45 | + # | "Joel Embiid" | 25 | + # | "Jonathon Simmons" | 29 | + # | "Kevin Durant" | 30 | + # | "Klay Thompson" | 29 | + # | "Kobe Bryant" | 40 | + # | "Kristaps Porzingis" | 23 | + # | "Kyle Anderson" | 25 | + # | "Kyrie Irving" | 26 | + # | "LaMarcus Aldridge" | 33 | + # | "LeBron James" | 34 | + # | "Luka Doncic" | 20 | + # | "Manu Ginobili" | 41 | + # | "Marc Gasol" | 34 | + # | "Marco Belinelli" | 32 | + # | "Nobody" | 0 | + # | "Null1" | -1 | + # | "Null2" | -2 | + # | "Null3" | -3 | + # | "Null4" | -4 | + # | "Paul Gasol" | 38 | + # | "Paul George" | 28 | + # | "Rajon Rondo" | 33 | + # | "Ray Allen" | 43 | + # | "Ricky Rubio" | 28 | + # | "Rudy Gay" | 32 | + # | "Russell Westbrook" | 30 | + # | "Shaquile O'Neal" | 47 | + # | "Stephen Curry" | 31 | + # | "Steve Nash" | 45 | + # | "Tiago Splitter" | 34 | + # | "Tim Duncan" | 42 | + # And the execution plan should be: + # | id | name | dependencies | operator info | + # | 3 | Project | 2 | | + # | 2 | Filter | 4 | {"condition": "(player.age NOT IN [39,36])"} | + # | 4 | TagIndexFullScan | 0 | | + # | 0 | Start | | | Scenario: Tag with relational CONTAINS/NOT CONTAINS filter When profiling query: From 219f6042c04ba651409d376458fb8580367ce2d1 Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Sun, 19 Sep 2021 01:17:54 +0800 Subject: [PATCH 3/8] Fix rewriteLogicalAndToLogicalOr() and clear tests --- .../rule/UnionAllIndexScanBaseRule.cpp | 18 +- src/graph/util/ExpressionUtils.cpp | 53 ++-- src/graph/util/ExpressionUtils.h | 3 - src/graph/visitor/FindVisitor.cpp | 9 + src/graph/visitor/FindVisitor.h | 1 + .../features/lookup/TagIndexFullScan.feature | 228 +++++++++--------- 6 files changed, 158 insertions(+), 154 deletions(-) diff --git a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp index 79814487bf2..2333ba672f4 100644 --- a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp +++ b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp @@ -144,19 +144,17 @@ StatusOr UnionAllIndexScanBaseRule::transform(OptContext* ctx, // OR expr case ExprKind::kLogicalOr: { - auto relInExprs = graph::ExpressionUtils::collectAll(transformedExpr, {ExprKind::kRelIn}); - if (!relInExprs.empty()) { - // Iterate all operands and expand IN exprs if possible - for (auto& expr : static_cast(transformedExpr)->operands()) { - if (expr->kind() == ExprKind::kRelIn) { - if (OptimizerUtils::relExprHasIndex(expr, indexItems)) { - expr = graph::ExpressionUtils::rewriteInExpr(expr); - } + // Iterate all operands and expand IN exprs if possible + for (auto& expr : static_cast(transformedExpr)->operands()) { + if (expr->kind() == ExprKind::kRelIn) { + if (OptimizerUtils::relExprHasIndex(expr, indexItems)) { + expr = graph::ExpressionUtils::rewriteInExpr(expr); } } - // Flatten OR exprs - graph::ExpressionUtils::pullOrs(transformedExpr); } + // Flatten OR exprs + graph::ExpressionUtils::pullOrs(transformedExpr); + break; } default: diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index 1a75fb5f0b2..240513c9e92 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -64,22 +64,6 @@ std::vector ExpressionUtils::collectAll( return std::move(visitor).results(); } -// Find all expression except fot the specified kind -// Empty for not found any one -std::vector ExpressionUtils::collectAllExcept( - const Expression *self, const std::unordered_set &excludedKind) { - auto finder = [&excludedKind](const Expression *expr) -> bool { - // return true if the current expr kind is in the excludedKind list - if (excludedKind.find(expr->kind()) == excludedKind.end()) { - return true; - } - return false; - }; - FindVisitor visitor(finder, true); - const_cast(self)->accept(&visitor); - return std::move(visitor).results(); -} - std::vector ExpressionUtils::findAllStorage(const Expression *expr) { return collectAll(expr, {Expression::Kind::kTagProperty, @@ -216,15 +200,29 @@ Expression *ExpressionUtils::rewriteLogicalAndToLogicalOr(const Expression *expr DCHECK(expr->kind() == Expression::Kind::kLogicalAnd); auto pool = expr->getObjPool(); auto logicalAndExpr = static_cast(expr->clone()); + auto logicalAndExprSize = (logicalAndExpr->operands()).size(); // Extract all OR expr auto orExprList = collectAll(logicalAndExpr, {Expression::Kind::kLogicalOr}); - auto nonOrExprList = collectAllExcept(logicalAndExpr, {Expression::Kind::kLogicalOr}); + auto orExprListSize = orExprList.size(); + + // Extract all non-OR expr + std::vector nonOrExprList; + bool isAllRelOr = logicalAndExprSize == orExprListSize; + + // If logical expression has operand that is not an OR expr, add into nonOrExprList + if (!isAllRelOr) { + nonOrExprList.reserve(logicalAndExprSize - orExprListSize); + for (const auto &operand : logicalAndExpr->operands()) { + if (operand->kind() != Expression::Kind::kLogicalOr) { + nonOrExprList.emplace_back(std::move(operand)); + } + } + } - DCHECK_GT(orExprList.size(), 1); - std::vector> orExprOperands; - orExprOperands.reserve(orExprList.size()); - // orExprOperands.emplace_back(std::move(orExprList[0])); + DCHECK_GT(orExprListSize, 0); + std::vector> orExprOperands{{}}; + orExprOperands.reserve(orExprListSize); // Merge the elements of vec2 into each subVec of vec1 // [[A], [B]] and [C, D] => [[A, C], [A, D], [B, C], [B,D]] @@ -241,21 +239,24 @@ Expression *ExpressionUtils::rewriteLogicalAndToLogicalOr(const Expression *expr return res; }; - // Iterate all OR exprs + // Iterate all OR exprs and construct the operand list for (auto curExpr : orExprList) { auto curLogicalOrExpr = static_cast(const_cast(curExpr)); - // auto tempExpr = LogicalExpression::makeAnd(pool); - auto curOrOperands = curLogicalOrExpr->operands(); + orExprOperands = mergeVecs(orExprOperands, curOrOperands); } - // orExprOperands is a 2D vecter where each sub-vecter is the operands of AND expression. + // orExprOperands is a 2D vector where each sub-vector is the operands of AND expression. // [[A, C], [A, D], [B, C], [B,D]] => (A and C) or (A and D) or (B and C) or (B or D) std::vector andExprList; andExprList.reserve(orExprOperands.size()); - for (const auto &operand : orExprOperands) { + for (auto &operand : orExprOperands) { auto andExpr = LogicalExpression::makeAnd(pool); + // if nonOrExprList is not empty, append it to operand + if (!isAllRelOr) { + operand.insert(operand.end(), nonOrExprList.begin(), nonOrExprList.end()); + } andExpr->setOperands(operand); andExprList.emplace_back(std::move(andExpr)); } diff --git a/src/graph/util/ExpressionUtils.h b/src/graph/util/ExpressionUtils.h index 94c9aaf9dc0..a4faeaf5777 100644 --- a/src/graph/util/ExpressionUtils.h +++ b/src/graph/util/ExpressionUtils.h @@ -45,9 +45,6 @@ class ExpressionUtils { static std::vector collectAll( const Expression* self, const std::unordered_set& expected); - static std::vector collectAllExcept( - const Expression* self, const std::unordered_set& excludedKind); - static std::vector findAllStorage(const Expression* expr); static std::vector findAllInputVariableProp(const Expression* expr); diff --git a/src/graph/visitor/FindVisitor.cpp b/src/graph/visitor/FindVisitor.cpp index ec0d5505975..5f8142a5003 100644 --- a/src/graph/visitor/FindVisitor.cpp +++ b/src/graph/visitor/FindVisitor.cpp @@ -121,6 +121,15 @@ void FindVisitor::visit(ListComprehensionExpression* expr) { } } +void FindVisitor::visit(LogicalExpression* expr) { + findInCurrentExpr(expr); + if (!needFindAll_ && !foundExprs_.empty()) return; + for (const auto& operand : expr->operands()) { + operand->accept(this); + if (!needFindAll_ && !foundExprs_.empty()) return; + } +} + void FindVisitor::visit(ConstantExpression* expr) { findInCurrentExpr(expr); } void FindVisitor::visit(EdgePropertyExpression* expr) { findInCurrentExpr(expr); } diff --git a/src/graph/visitor/FindVisitor.h b/src/graph/visitor/FindVisitor.h index 64936264416..3553c551734 100644 --- a/src/graph/visitor/FindVisitor.h +++ b/src/graph/visitor/FindVisitor.h @@ -66,6 +66,7 @@ class FindVisitor final : public ExprVisitorImpl { void visit(ColumnExpression* expr) override; void visit(ListComprehensionExpression* expr) override; void visit(SubscriptRangeExpression* expr) override; + void visit(LogicalExpression* expr) override; void visitBinaryExpr(BinaryExpression* expr) override; void findInCurrentExpr(Expression* expr); diff --git a/tests/tck/features/lookup/TagIndexFullScan.feature b/tests/tck/features/lookup/TagIndexFullScan.feature index 9e6583aaf47..ecbbbd30817 100644 --- a/tests/tck/features/lookup/TagIndexFullScan.feature +++ b/tests/tck/features/lookup/TagIndexFullScan.feature @@ -93,7 +93,6 @@ Feature: Lookup tag index full scan | 0 | Start | | | # TODO: Support compare operator info that has multiple column hints - @aiee Scenario: Tag with relational IN/NOT IN filter When profiling query: """ @@ -170,11 +169,11 @@ Feature: Lookup tag index full scan | VertexID | player.age | | "Kobe Bryant" | 40 | And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 4 | | - | 4 | TagIndexPrefixScan | 0 | {"indexCtx": {"columnHints":{"endValue":"__EMPTY__","beginValue":"\"Kobe Bryant","scanType":"PREFIX","column":"name"}}} | - | 0 | Start | | | - # a IN b AND c IN d (TagIndexFullScan) + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # a IN b AND c IN d When profiling query: """ LOOKUP ON player WHERE player.age IN [40, 25] AND player.name IN ["ABC", "Kobe Bryant"] YIELD player.age @@ -183,115 +182,114 @@ Feature: Lookup tag index full scan | VertexID | player.age | | "Kobe Bryant" | 40 | And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(((player.age==40) OR (player.age==25)) AND ((player.name==\"ABC\") OR (player.name==\"Kobe Bryant\")))"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | - # When profiling query: - # """ - # LOOKUP ON team WHERE team.name NOT IN ["Hornets", "Jazz"] - # """ - # Then the result should be, in any order: - # | VertexID | - # | "76ers" | - # | "Bucks" | - # | "Bulls" | - # | "Cavaliers" | - # | "Celtics" | - # | "Clippers" | - # | "Grizzlies" | - # | "Hawks" | - # | "Heat" | - # | "Wizards" | - # | "Warriors" | - # | "Kings" | - # | "Knicks" | - # | "Lakers" | - # | "Magic" | - # | "Mavericks" | - # | "Nets" | - # | "Nuggets" | - # | "Pacers" | - # | "Pelicans" | - # | "Pistons" | - # | "Raptors" | - # | "Rockets" | - # | "Spurs" | - # | "Suns" | - # | "Thunders" | - # | "Timberwolves" | - # | "Trail Blazers" | - # And the execution plan should be: - # | id | name | dependencies | operator info | - # | 3 | Project | 2 | | - # | 2 | Filter | 4 | {"condition": "(team.name NOT IN [\"Hornets\",\"Jazz\"])"} | - # | 4 | TagIndexFullScan | 0 | | - # | 0 | Start | | | - # When profiling query: - # """ - # LOOKUP ON player WHERE player.age NOT IN [40 - 1 , 72/2] YIELD player.age - # """ - # Then the result should be, in any order: - # | VertexID | player.age | - # | "Yao Ming" | 38 | - # | "Aron Baynes" | 32 | - # | "Ben Simmons" | 22 | - # | "Blake Griffin" | 30 | - # | "Vince Carter" | 42 | - # | "Carmelo Anthony" | 34 | - # | "Chris Paul" | 33 | - # | "Cory Joseph" | 27 | - # | "Damian Lillard" | 28 | - # | "Danny Green" | 31 | - # | "David West" | 38 | - # | "DeAndre Jordan" | 30 | - # | "Dejounte Murray" | 29 | - # | "Dirk Nowitzki" | 40 | - # | "Dwight Howard" | 33 | - # | "Dwyane Wade" | 37 | - # | "Giannis Antetokounmpo" | 24 | - # | "Grant Hill" | 46 | - # | "JaVale McGee" | 31 | - # | "James Harden" | 29 | - # | "Jason Kidd" | 45 | - # | "Joel Embiid" | 25 | - # | "Jonathon Simmons" | 29 | - # | "Kevin Durant" | 30 | - # | "Klay Thompson" | 29 | - # | "Kobe Bryant" | 40 | - # | "Kristaps Porzingis" | 23 | - # | "Kyle Anderson" | 25 | - # | "Kyrie Irving" | 26 | - # | "LaMarcus Aldridge" | 33 | - # | "LeBron James" | 34 | - # | "Luka Doncic" | 20 | - # | "Manu Ginobili" | 41 | - # | "Marc Gasol" | 34 | - # | "Marco Belinelli" | 32 | - # | "Nobody" | 0 | - # | "Null1" | -1 | - # | "Null2" | -2 | - # | "Null3" | -3 | - # | "Null4" | -4 | - # | "Paul Gasol" | 38 | - # | "Paul George" | 28 | - # | "Rajon Rondo" | 33 | - # | "Ray Allen" | 43 | - # | "Ricky Rubio" | 28 | - # | "Rudy Gay" | 32 | - # | "Russell Westbrook" | 30 | - # | "Shaquile O'Neal" | 47 | - # | "Stephen Curry" | 31 | - # | "Steve Nash" | 45 | - # | "Tiago Splitter" | 34 | - # | "Tim Duncan" | 42 | - # And the execution plan should be: - # | id | name | dependencies | operator info | - # | 3 | Project | 2 | | - # | 2 | Filter | 4 | {"condition": "(player.age NOT IN [39,36])"} | - # | 4 | TagIndexFullScan | 0 | | - # | 0 | Start | | | + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + When profiling query: + """ + LOOKUP ON team WHERE team.name NOT IN ["Hornets", "Jazz"] + """ + Then the result should be, in any order: + | VertexID | + | "76ers" | + | "Bucks" | + | "Bulls" | + | "Cavaliers" | + | "Celtics" | + | "Clippers" | + | "Grizzlies" | + | "Hawks" | + | "Heat" | + | "Wizards" | + | "Warriors" | + | "Kings" | + | "Knicks" | + | "Lakers" | + | "Magic" | + | "Mavericks" | + | "Nets" | + | "Nuggets" | + | "Pacers" | + | "Pelicans" | + | "Pistons" | + | "Raptors" | + | "Rockets" | + | "Spurs" | + | "Suns" | + | "Thunders" | + | "Timberwolves" | + | "Trail Blazers" | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 2 | | + | 2 | Filter | 4 | {"condition": "(team.name NOT IN [\"Hornets\",\"Jazz\"])"} | + | 4 | TagIndexFullScan | 0 | | + | 0 | Start | | | + When profiling query: + """ + LOOKUP ON player WHERE player.age NOT IN [40 - 1 , 72/2] YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Yao Ming" | 38 | + | "Aron Baynes" | 32 | + | "Ben Simmons" | 22 | + | "Blake Griffin" | 30 | + | "Vince Carter" | 42 | + | "Carmelo Anthony" | 34 | + | "Chris Paul" | 33 | + | "Cory Joseph" | 27 | + | "Damian Lillard" | 28 | + | "Danny Green" | 31 | + | "David West" | 38 | + | "DeAndre Jordan" | 30 | + | "Dejounte Murray" | 29 | + | "Dirk Nowitzki" | 40 | + | "Dwight Howard" | 33 | + | "Dwyane Wade" | 37 | + | "Giannis Antetokounmpo" | 24 | + | "Grant Hill" | 46 | + | "JaVale McGee" | 31 | + | "James Harden" | 29 | + | "Jason Kidd" | 45 | + | "Joel Embiid" | 25 | + | "Jonathon Simmons" | 29 | + | "Kevin Durant" | 30 | + | "Klay Thompson" | 29 | + | "Kobe Bryant" | 40 | + | "Kristaps Porzingis" | 23 | + | "Kyle Anderson" | 25 | + | "Kyrie Irving" | 26 | + | "LaMarcus Aldridge" | 33 | + | "LeBron James" | 34 | + | "Luka Doncic" | 20 | + | "Manu Ginobili" | 41 | + | "Marc Gasol" | 34 | + | "Marco Belinelli" | 32 | + | "Nobody" | 0 | + | "Null1" | -1 | + | "Null2" | -2 | + | "Null3" | -3 | + | "Null4" | -4 | + | "Paul Gasol" | 38 | + | "Paul George" | 28 | + | "Rajon Rondo" | 33 | + | "Ray Allen" | 43 | + | "Ricky Rubio" | 28 | + | "Rudy Gay" | 32 | + | "Russell Westbrook" | 30 | + | "Shaquile O'Neal" | 47 | + | "Stephen Curry" | 31 | + | "Steve Nash" | 45 | + | "Tiago Splitter" | 34 | + | "Tim Duncan" | 42 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 2 | | + | 2 | Filter | 4 | {"condition": "(player.age NOT IN [39,36])"} | + | 4 | TagIndexFullScan | 0 | | + | 0 | Start | | | Scenario: Tag with relational CONTAINS/NOT CONTAINS filter When profiling query: From 9d26461ab28601c93cc3fe64f4c8152f190385a8 Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Wed, 22 Sep 2021 16:22:48 +0800 Subject: [PATCH 4/8] Fix tests Fix compilation --- src/graph/optimizer/OptimizerUtils.cpp | 8 ++++++-- .../rule/OptimizeTagIndexScanByFilterRule.cpp | 2 +- src/graph/visitor/FoldConstantExprVisitor.cpp | 2 +- .../test/FoldConstantExprVisitorTest.cpp | 4 ++-- .../features/lookup/EdgeIndexFullScan.feature | 19 +++++++++---------- tests/tck/features/lookup/LookupEdge2.feature | 10 +++++----- tests/tck/features/lookup/LookupTag2.feature | 10 +++++----- 7 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/graph/optimizer/OptimizerUtils.cpp b/src/graph/optimizer/OptimizerUtils.cpp index fb01ccbf519..764f109f8b6 100644 --- a/src/graph/optimizer/OptimizerUtils.cpp +++ b/src/graph/optimizer/OptimizerUtils.cpp @@ -642,8 +642,12 @@ StatusOr selectRelExprIndex(const ColumnDef& field, } auto right = expr->right(); - expr->kind() == Expression::Kind::kRelIn ? DCHECK(right->isContainerExpr()) - : DCHECK(right->kind() == Expression::Kind::kConstant); + if (expr->kind() == Expression::Kind::kRelIn) { // container expressions + DCHECK(right->isContainerExpr()); + } else { // other expressions + DCHECK(right->kind() == Expression::Kind::kConstant); + } + const auto& value = static_cast(right)->value(); ScoredColumnHint hint; diff --git a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp index 0f4edab4a3d..77e75735485 100644 --- a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp +++ b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp @@ -53,7 +53,7 @@ const Pattern& OptimizeTagIndexScanByFilterRule::pattern() const { // It the list has more than 1 element, the expr will be matched with UnionAllIndexScanBaseRule. // // 2. Logical AND expr. If the AND expr contains an operand that is an IN expr, the label attribute -// in the IN expr SHUOLD NOT have a valid index, otherwise the expression should be matched with +// in the IN expr SHOULD NOT have a valid index, otherwise the expression should be matched with // UnionAllIndexScanBaseRule. bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResult& matched) const { if (!OptRule::match(ctx, matched)) { diff --git a/src/graph/visitor/FoldConstantExprVisitor.cpp b/src/graph/visitor/FoldConstantExprVisitor.cpp index 47c4af9c533..a075d7eef6b 100644 --- a/src/graph/visitor/FoldConstantExprVisitor.cpp +++ b/src/graph/visitor/FoldConstantExprVisitor.cpp @@ -338,7 +338,7 @@ void FoldConstantExprVisitor::visitBinaryExpr(BinaryExpression *expr) { } Expression *FoldConstantExprVisitor::fold(Expression *expr) { - // Container expresison shuold remain the same type after being folded + // Container expresison should remain the same type after being folded if (expr->isContainerExpr()) { return expr; } diff --git a/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp b/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp index 43d4dd0399c..90724f2cf95 100644 --- a/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp +++ b/src/graph/visitor/test/FoldConstantExprVisitorTest.cpp @@ -119,7 +119,7 @@ TEST_F(FoldConstantExprVisitorTest, TestSetExpr) { ASSERT_EQ(*expr, *expected) << expr->toString() << " vs. " << expected->toString(); ASSERT(visitor.canBeFolded()); // type should remain the same after folding - ASSERT_EQ(expr->kind(), Expression::Kind::kList); + ASSERT_EQ(expr->kind(), Expression::Kind::kSet); } TEST_F(FoldConstantExprVisitorTest, TestMapExpr) { @@ -136,7 +136,7 @@ TEST_F(FoldConstantExprVisitorTest, TestMapExpr) { ASSERT_EQ(*expr, *expected) << expr->toString() << " vs. " << expected->toString(); ASSERT(visitor.canBeFolded()); // type should remain the same after folding - ASSERT_EQ(expr->kind(), Expression::Kind::kList); + ASSERT_EQ(expr->kind(), Expression::Kind::kMap); } TEST_F(FoldConstantExprVisitorTest, TestCaseExpr) { diff --git a/tests/tck/features/lookup/EdgeIndexFullScan.feature b/tests/tck/features/lookup/EdgeIndexFullScan.feature index d66094cb7f7..bf89a32e5c2 100644 --- a/tests/tck/features/lookup/EdgeIndexFullScan.feature +++ b/tests/tck/features/lookup/EdgeIndexFullScan.feature @@ -202,11 +202,11 @@ Feature: Lookup edge index full scan | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | | "101" | "102" | 0 | "Red1" | 11 | And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 4 | | - | 4 | EdgeIndexPrefixScan | 0 | | - | 0 | Start | | | - # a IN b AND c IN d (EdgeIndexFullScan) + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # a IN b AND c IN d (4 prefixScan will be executed) When profiling query: """ LOOKUP ON edge_1 @@ -217,11 +217,10 @@ Feature: Lookup edge index full scan | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | | "101" | "102" | 0 | "Red1" | 11 | And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(((edge_1.col2_int==11) OR (edge_1.col2_int==33)) AND ((edge_1.col1_str==\"Red1\") OR (edge_1.col1_str==\"ABC\")))"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | When profiling query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str NOT IN ["Blue"] YIELD edge_1.col1_str diff --git a/tests/tck/features/lookup/LookupEdge2.feature b/tests/tck/features/lookup/LookupEdge2.feature index 1e7f6a536ed..aeac34348f3 100644 --- a/tests/tck/features/lookup/LookupEdge2.feature +++ b/tests/tck/features/lookup/LookupEdge2.feature @@ -27,6 +27,11 @@ Feature: Test lookup on edge index 2 """ Scenario Outline: [edge] Simple test cases + When executing query: + """ + LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 201 OR lookup_edge_1.col2 == 201 AND lookup_edge_1.col3 == 202 + """ + Then the execution should be successful When executing query: """ LOOKUP ON lookup_edge_1 WHERE col1 == 201 @@ -37,11 +42,6 @@ Feature: Test lookup on edge index 2 LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 201 OR lookup_edge_1.col5 == 201 """ Then a SemanticError should be raised at runtime: Invalid column: col5 - When executing query: - """ - LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 201 OR lookup_edge_1.col2 == 201 AND lookup_edge_1.col3 == 202 - """ - Then a SemanticError should be raised at runtime: Not supported filter When executing query: """ LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 300 diff --git a/tests/tck/features/lookup/LookupTag2.feature b/tests/tck/features/lookup/LookupTag2.feature index 46b594b0aa1..50616a8a4ef 100644 --- a/tests/tck/features/lookup/LookupTag2.feature +++ b/tests/tck/features/lookup/LookupTag2.feature @@ -28,6 +28,11 @@ Feature: Test lookup on tag index 2 """ Scenario Outline: [tag] simple tag test cases + When executing query: + """ + LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 201 OR lookup_tag_1.col2 == 201 AND lookup_tag_1.col3 == 202 + """ + Then the execution should be successful When executing query: """ LOOKUP ON lookup_tag_1 WHERE col1 == 200; @@ -38,11 +43,6 @@ Feature: Test lookup on tag index 2 LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 200 OR lookup_tag_1.col5 == 20; """ Then a SemanticError should be raised at runtime: Invalid column: col5 - When executing query: - """ - LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 201 OR lookup_tag_1.col2 == 201 AND lookup_tag_1.col3 == 202 - """ - Then a SemanticError should be raised at runtime: Not supported filter When executing query: """ LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 300 From 0d18f1357f58e4445db3ec34f2b525f5c8f82b80 Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Wed, 22 Sep 2021 22:05:14 +0800 Subject: [PATCH 5/8] Forbid using string-related relational expressions as the filter of LOOKUP except STARTS/NOT STARTS WITH --- src/graph/validator/LookupValidator.cpp | 50 +++--- .../features/lookup/EdgeIndexFullScan.feature | 106 +----------- .../features/lookup/TagIndexFullScan.feature | 160 +----------------- 3 files changed, 46 insertions(+), 270 deletions(-) diff --git a/src/graph/validator/LookupValidator.cpp b/src/graph/validator/LookupValidator.cpp index cc2eeef06f7..023bf2a32bc 100644 --- a/src/graph/validator/LookupValidator.cpp +++ b/src/graph/validator/LookupValidator.cpp @@ -21,6 +21,8 @@ using nebula::meta::NebulaSchemaProvider; using std::shared_ptr; using std::unique_ptr; +using ExprKind = nebula::Expression::Kind; + namespace nebula { namespace graph { @@ -186,6 +188,7 @@ Status LookupValidator::validateYield() { return Status::OK(); } lookupCtx_->dedup = yieldClause->isDistinct(); + if (lookupCtx_->isEdge) { NG_RETURN_IF_ERROR(validateYieldEdge()); } else { @@ -249,16 +252,25 @@ StatusOr LookupValidator::handleLogicalExprOperands(LogicalExpressi StatusOr LookupValidator::checkFilter(Expression* expr) { // TODO: Support IN expression push down if (expr->isRelExpr()) { + // Only starts with can be pushed down as a range scan, so forbid other string-related relExpr + if (expr->kind() == ExprKind::kRelREG || expr->kind() == ExprKind::kContains || + expr->kind() == ExprKind::kNotContains || expr->kind() == ExprKind::kEndsWith || + expr->kind() == ExprKind::kNotEndsWith) { + return Status::SemanticError( + "Expression %s is not supported, please use full-text index as an optimal solution", + expr->toString().c_str()); + } + auto relExpr = static_cast(expr); NG_RETURN_IF_ERROR(checkRelExpr(relExpr)); return rewriteRelExpr(relExpr); } switch (expr->kind()) { - case Expression::Kind::kLogicalOr: { + case ExprKind::kLogicalOr: { ExpressionUtils::pullOrs(expr); return handleLogicalExprOperands(static_cast(expr)); } - case Expression::Kind::kLogicalAnd: { + case ExprKind::kLogicalAnd: { ExpressionUtils::pullAnds(expr); return handleLogicalExprOperands(static_cast(expr)); } @@ -272,12 +284,10 @@ Status LookupValidator::checkRelExpr(RelationalExpression* expr) { auto* left = expr->left(); auto* right = expr->right(); // Does not support filter : schema.col1 > schema.col2 - if (left->kind() == Expression::Kind::kLabelAttribute && - right->kind() == Expression::Kind::kLabelAttribute) { + if (left->kind() == ExprKind::kLabelAttribute && right->kind() == ExprKind::kLabelAttribute) { return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str()); } - if (left->kind() == Expression::Kind::kLabelAttribute || - right->kind() == Expression::Kind::kLabelAttribute) { + if (left->kind() == ExprKind::kLabelAttribute || right->kind() == ExprKind::kLabelAttribute) { return Status::OK(); } return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str()); @@ -287,7 +297,7 @@ StatusOr LookupValidator::rewriteRelExpr(RelationalExpression* expr // swap LHS and RHS of relExpr if LabelAttributeExpr in on the right, // so that LabelAttributeExpr is always on the left auto rightOperand = expr->right(); - if (rightOperand->kind() == Expression::Kind::kLabelAttribute) { + if (rightOperand->kind() == ExprKind::kLabelAttribute) { expr = static_cast(reverseRelKind(expr)); } @@ -301,7 +311,7 @@ StatusOr LookupValidator::rewriteRelExpr(RelationalExpression* expr auto foldRes = ExpressionUtils::foldConstantExpr(expr); NG_RETURN_IF_ERROR(foldRes); expr = static_cast(foldRes.value()); - DCHECK_EQ(expr->left()->kind(), Expression::Kind::kLabelAttribute); + DCHECK_EQ(expr->left()->kind(), ExprKind::kLabelAttribute); // Check schema and value type std::string prop = la->right()->value().getStr(); @@ -319,7 +329,7 @@ StatusOr LookupValidator::rewriteRelExpr(RelationalExpression* expr StatusOr LookupValidator::checkConstExpr(Expression* expr, const std::string& prop, - const Expression::Kind kind) { + const ExprKind kind) { auto* pool = expr->getObjPool(); if (!evaluableExpr(expr)) { return Status::SemanticError("'%s' is not an evaluable expression.", expr->toString().c_str()); @@ -344,7 +354,7 @@ StatusOr LookupValidator::checkConstExpr(Expression* expr, double f = v.getFloat(); int iCeil = ceil(f); int iFloor = floor(f); - if (kind == Expression::Kind::kRelGE || kind == Expression::Kind::kRelLT) { + if (kind == ExprKind::kRelGE || kind == ExprKind::kRelLT) { // edge case col1 >= 40.0, no need to round up if (std::abs(f - iCeil) < kEpsilon) { return ConstantExpression::make(pool, iFloor); @@ -391,21 +401,21 @@ Expression* LookupValidator::reverseRelKind(RelationalExpression* expr) { auto reversedKind = kind; switch (kind) { - case Expression::Kind::kRelEQ: + case ExprKind::kRelEQ: break; - case Expression::Kind::kRelNE: + case ExprKind::kRelNE: break; - case Expression::Kind::kRelLT: - reversedKind = Expression::Kind::kRelGT; + case ExprKind::kRelLT: + reversedKind = ExprKind::kRelGT; break; - case Expression::Kind::kRelLE: - reversedKind = Expression::Kind::kRelGE; + case ExprKind::kRelLE: + reversedKind = ExprKind::kRelGE; break; - case Expression::Kind::kRelGT: - reversedKind = Expression::Kind::kRelLT; + case ExprKind::kRelGT: + reversedKind = ExprKind::kRelLT; break; - case Expression::Kind::kRelGE: - reversedKind = Expression::Kind::kRelLE; + case ExprKind::kRelGE: + reversedKind = ExprKind::kRelLE; break; default: LOG(FATAL) << "Invalid relational expression kind: " << static_cast(kind); diff --git a/tests/tck/features/lookup/EdgeIndexFullScan.feature b/tests/tck/features/lookup/EdgeIndexFullScan.feature index bf89a32e5c2..a2e6b49ea35 100644 --- a/tests/tck/features/lookup/EdgeIndexFullScan.feature +++ b/tests/tck/features/lookup/EdgeIndexFullScan.feature @@ -33,51 +33,12 @@ Feature: Lookup edge index full scan '103'->'101':('Blue', 33); """ - Scenario: Edge with relational RegExp filter[1] + Scenario: Edge with relational RegExp filter When executing query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str =~ "\\w+\\d+" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "101" | "102" | 0 | "Red1" | - When executing query: - """ - LOOKUP ON edge_1 WHERE edge_1.col1_str =~ "\\w+ll\\w+" YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "102" | "103" | 0 | "Yellow" | - - # skip because `make fmt` will delete '\' in the operator info and causes tests fail - @skip - Scenario: Edge with relational RegExp filter[2] - When profiling query: - """ - LOOKUP ON edge_1 where edge_1.col1_str =~ "\\d+\\w+" YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "101" | "102" | 0 | "Red1" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str=~\"\w+\d+\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | - When profiling query: - """ - LOOKUP ON edge_1 where edge_1.col1_str =~ "\\w+ea\\w+" YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "102" | "103" | 0 | "Yellow" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str=~\"\w+ea\w+\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str=~"\w+\d+") is not supported, please use full-text index as an optimal solution Scenario: Edge with relational NE filter When profiling query: @@ -250,39 +211,16 @@ Feature: Lookup edge index full scan | 0 | Start | | | Scenario: Edge with relational CONTAINS/NOT CONTAINS filter - When profiling query: - """ - LOOKUP ON edge_1 WHERE edge_1.col1_str CONTAINS toLower("L") YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "103" | "101" | 0 | "Blue" | - | "102" | "103" | 0 | "Yellow" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str CONTAINS \"l\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | When executing query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str CONTAINS "ABC" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - When profiling query: + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str CONTAINS "ABC") is not supported, please use full-text index as an optimal solution + When executing query: """ - LOOKUP ON edge_1 WHERE edge_1.col1_str NOT CONTAINS toLower("L") YIELD edge_1.col1_str + LOOKUP ON edge_1 WHERE edge_1.col1_str NOT CONTAINS "ABC" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "101" | "102" | 0 | "Red1" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT CONTAINS \"l\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str NOT CONTAINS "ABC") is not supported, please use full-text index as an optimal solution Scenario: Edge with relational STARTS/NOT STARTS WITH filter When profiling query: @@ -325,41 +263,13 @@ Feature: Lookup edge index full scan | 0 | Start | | | Scenario: Edge with relational ENDS/NOT ENDS WITH filter - When profiling query: - """ - LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH toLower("E") YIELD edge_1.col1_str - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "103" | "101" | 0 | "Blue" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str ENDS WITH \"e\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | When executing query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH "ABC" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str ENDS WITH "ABC") is not supported, please use full-text index as an optimal solution When executing query: - """ - LOOKUP ON edge_1 WHERE edge_1.col1_str ENDS WITH 123 YIELD edge_1.col1_str - """ - Then a SemanticError should be raised at runtime: Column type error : col1_str - When profiling query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str NOT ENDS WITH toLower("E") YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "101" | "102" | 0 | "Red1" | - | "102" | "103" | 0 | "Yellow" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT ENDS WITH \"e\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str NOT ENDS WITH toLower("E")) is not supported, please use full-text index as an optimal solution diff --git a/tests/tck/features/lookup/TagIndexFullScan.feature b/tests/tck/features/lookup/TagIndexFullScan.feature index ecbbbd30817..e3b76329de3 100644 --- a/tests/tck/features/lookup/TagIndexFullScan.feature +++ b/tests/tck/features/lookup/TagIndexFullScan.feature @@ -8,46 +8,7 @@ Feature: Lookup tag index full scan """ LOOKUP ON team where team.name =~ "\\d+\\w+" """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - When executing query: - """ - LOOKUP ON team where team.name =~ "\\w+ea\\w+" - """ - Then the result should be, in any order: - | VertexID | - | "Heat" | - - # skip because `make fmt` will delete '\' in the operator info and causes tests fail - @skip - Scenario: Tag with relational RegExp filter[2] - When profiling query: - """ - LOOKUP ON team where team.name =~ "\\d+\\w+" - """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name=~\"\d+\w+\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | - When profiling query: - """ - LOOKUP ON team where team.name =~ "\\w+ea\\w+" - """ - Then the result should be, in any order: - | VertexID | - | "Heat" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name=~\"\w+ea\w+\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (team.name=~"\d+\w+") is not supported, please use full-text index as an optimal solution Scenario: Tag with relational NE filter When profiling query: @@ -292,66 +253,16 @@ Feature: Lookup tag index full scan | 0 | Start | | | Scenario: Tag with relational CONTAINS/NOT CONTAINS filter - When profiling query: - """ - LOOKUP ON team WHERE team.name CONTAINS toLower("ER") - """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - | "Trail Blazers" | - | "Timberwolves" | - | "Cavaliers" | - | "Thunders" | - | "Clippers" | - | "Pacers" | - | "Mavericks" | - | "Lakers" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name CONTAINS \"er\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | When executing query: """ LOOKUP ON team WHERE team.name CONTAINS "ABC" """ - Then the result should be, in any order: - | VertexID | - When profiling query: + Then a SemanticError should be raised at runtime: Expression (team.name CONTAINS "ABC") is not supported, please use full-text index as an optimal solution + When executing query: """ - LOOKUP ON team WHERE team.name NOT CONTAINS toLower("ER") + LOOKUP ON team WHERE team.name NOT CONTAINS "ABC" """ - Then the result should be, in any order: - | VertexID | - | "Wizards" | - | "Bucks" | - | "Bulls" | - | "Warriors" | - | "Celtics" | - | "Suns" | - | "Grizzlies" | - | "Hawks" | - | "Heat" | - | "Hornets" | - | "Jazz" | - | "Kings" | - | "Knicks" | - | "Spurs" | - | "Magic" | - | "Rockets" | - | "Nets" | - | "Nuggets" | - | "Raptors" | - | "Pelicans" | - | "Pistons" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name NOT CONTAINS \"er\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (team.name NOT CONTAINS "ABC") is not supported, please use full-text index as an optimal solution Scenario: Tag with relational STARTS/NOT STARTS WITH filter When profiling query: @@ -421,68 +332,13 @@ Feature: Lookup tag index full scan | 0 | Start | | | Scenario: Tag with relational ENDS/NOT ENDS WITH filter - When profiling query: - """ - LOOKUP ON team WHERE team.name ENDS WITH toLower("S") - """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - | "Bucks" | - | "Bulls" | - | "Cavaliers" | - | "Celtics" | - | "Clippers" | - | "Grizzlies" | - | "Hawks" | - | "Wizards" | - | "Hornets" | - | "Warriors" | - | "Kings" | - | "Knicks" | - | "Lakers" | - | "Trail Blazers" | - | "Mavericks" | - | "Nets" | - | "Nuggets" | - | "Pacers" | - | "Pelicans" | - | "Pistons" | - | "Raptors" | - | "Rockets" | - | "Spurs" | - | "Suns" | - | "Thunders" | - | "Timberwolves" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name ENDS WITH \"s\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | When executing query: """ - LOOKUP ON team WHERE team.name ENDS WITH "ABC" + LOOKUP ON team WHERE team.name ENDS WITH toLower("S") """ - Then the result should be, in any order: - | VertexID | + Then a SemanticError should be raised at runtime: Expression (team.name ENDS WITH toLower("S")) is not supported, please use full-text index as an optimal solution When executing query: - """ - LOOKUP ON team WHERE team.name ENDS WITH 123 - """ - Then a SemanticError should be raised at runtime: Column type error : name - When profiling query: """ LOOKUP ON team WHERE team.name NOT ENDS WITH toLower("S") """ - Then the result should be, in any order: - | VertexID | - | "Magic" | - | "Jazz" | - | "Heat" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name NOT ENDS WITH \"s\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (team.name NOT ENDS WITH toLower("S")) is not supported, please use full-text index as an optimal solution From 959d7c6cb121a07ed16be87d9ebfc753026e2aa4 Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Thu, 23 Sep 2021 23:15:21 +0800 Subject: [PATCH 6/8] Address comments --- src/graph/validator/LookupValidator.cpp | 2 +- .../features/lookup/EdgeIndexFullScan.feature | 98 ++++++++++-- .../features/lookup/TagIndexFullScan.feature | 144 +++++++++++++----- 3 files changed, 186 insertions(+), 58 deletions(-) diff --git a/src/graph/validator/LookupValidator.cpp b/src/graph/validator/LookupValidator.cpp index 023bf2a32bc..4f5b5b2cd2c 100644 --- a/src/graph/validator/LookupValidator.cpp +++ b/src/graph/validator/LookupValidator.cpp @@ -255,7 +255,7 @@ StatusOr LookupValidator::checkFilter(Expression* expr) { // Only starts with can be pushed down as a range scan, so forbid other string-related relExpr if (expr->kind() == ExprKind::kRelREG || expr->kind() == ExprKind::kContains || expr->kind() == ExprKind::kNotContains || expr->kind() == ExprKind::kEndsWith || - expr->kind() == ExprKind::kNotEndsWith) { + expr->kind() == ExprKind::kNotStartsWith || expr->kind() == ExprKind::kNotEndsWith) { return Status::SemanticError( "Expression %s is not supported, please use full-text index as an optimal solution", expr->toString().c_str()); diff --git a/tests/tck/features/lookup/EdgeIndexFullScan.feature b/tests/tck/features/lookup/EdgeIndexFullScan.feature index a2e6b49ea35..d07348979c3 100644 --- a/tests/tck/features/lookup/EdgeIndexFullScan.feature +++ b/tests/tck/features/lookup/EdgeIndexFullScan.feature @@ -70,7 +70,7 @@ Feature: Lookup edge index full scan | 4 | EdgeIndexFullScan | 0 | | | 0 | Start | | | - Scenario: Edge with relational IN/NOT IN filter + Scenario: Edge with simple relational IN filter When profiling query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str IN ["Red", "Yellow"] YIELD edge_1.col1_str @@ -151,12 +151,30 @@ Feature: Lookup edge index full scan | 3 | Project | 4 | | | 4 | EdgeIndexPrefixScan | 0 | | | 0 | Start | | | - # a IN b AND c IN d + + Scenario: Edge with complex relational IN filter + # (a IN b) AND (c IN d) # List has only 1 element, so prefixScan is applied When profiling query: """ LOOKUP ON edge_1 - WHERE edge_1.col2_int IN [11 , 66/2] AND edge_1.col1_str IN [toUpper("r")+"ed1"] + WHERE edge_1.col2_int IN [11 , 33] AND edge_1.col1_str IN ["Red1"] + YIELD edge_1.col1_str, edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | + | "101" | "102" | 0 | "Red1" | 11 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # (a IN b) AND (c IN d) + # a, c both have indexes (4 prefixScan will be executed) + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col2_int IN [11 , 33] AND edge_1.col1_str IN ["Red1", "ABC"] YIELD edge_1.col1_str, edge_1.col2_int """ Then the result should be, in any order: @@ -167,11 +185,23 @@ Feature: Lookup edge index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b AND c IN d (4 prefixScan will be executed) + # (a IN b) AND (c IN d) + # a, c have a composite index + When executing query: + """ + CREATE EDGE INDEX composite_edge_index ON edge_1(col1_str(20), col2_int); + """ + Then the execution should be successful + And wait 6 seconds + When submit a job: + """ + REBUILD EDGE INDEX composite_edge_index + """ + Then wait the job to finish When profiling query: """ LOOKUP ON edge_1 - WHERE edge_1.col2_int IN [11 , 66/2] AND edge_1.col1_str IN [toUpper("r")+"ed1", "ABC"] + WHERE edge_1.col2_int IN [11 , 33] AND edge_1.col1_str IN ["Red1", "ABC"] YIELD edge_1.col1_str, edge_1.col2_int """ Then the result should be, in any order: @@ -182,6 +212,51 @@ Feature: Lookup edge index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | + # (a IN b) AND (c IN d) while only a has index + # first drop tag index + When executing query: + """ + DROP EDGE INDEX composite_edge_index + """ + Then the execution should be successful + When executing query: + """ + DROP EDGE INDEX col1_str_index + """ + Then the execution should be successful + And wait 6 seconds + # since the edge index has been dropped, here an EdgeIndexFullScan should be performed + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col1_str IN ["Red1", "ABC"] + YIELD edge_1.col1_str, edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | + | "101" | "102" | 0 | "Red1" | 11 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 2 | | + | 2 | Filter | 4 | | + | 4 | EdgeIndexFullScan | 0 | | + | 0 | Start | | | + When profiling query: + """ + LOOKUP ON edge_1 + WHERE edge_1.col2_int IN [11 , 33] AND edge_1.col1_str IN ["Red1", "ABC"] + YIELD edge_1.col1_str, edge_1.col2_int + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | edge_1.col1_str | edge_1.col2_int | + | "101" | "102" | 0 | "Red1" | 11 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + + Scenario: Edge with relational NOT IN filter When profiling query: """ LOOKUP ON edge_1 WHERE edge_1.col1_str NOT IN ["Blue"] YIELD edge_1.col1_str @@ -249,18 +324,9 @@ Feature: Lookup edge index full scan Then a SemanticError should be raised at runtime: Column type error : col1_str When profiling query: """ - LOOKUP ON edge_1 WHERE edge_1.col1_str NOT STARTS WITH toUpper("r") YIELD edge_1.col1_str + LOOKUP ON edge_1 WHERE edge_1.col1_str NOT STARTS WITH "R" YIELD edge_1.col1_str """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | edge_1.col1_str | - | "103" | "101" | 0 | "Blue" | - | "102" | "103" | 0 | "Yellow" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(edge_1.col1_str NOT STARTS WITH \"R\")"} | - | 4 | EdgeIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (edge_1.col1_str NOT STARTS WITH "R") is not supported, please use full-text index as an optimal solution Scenario: Edge with relational ENDS/NOT ENDS WITH filter When executing query: diff --git a/tests/tck/features/lookup/TagIndexFullScan.feature b/tests/tck/features/lookup/TagIndexFullScan.feature index e3b76329de3..85e189d9b0c 100644 --- a/tests/tck/features/lookup/TagIndexFullScan.feature +++ b/tests/tck/features/lookup/TagIndexFullScan.feature @@ -54,7 +54,7 @@ Feature: Lookup tag index full scan | 0 | Start | | | # TODO: Support compare operator info that has multiple column hints - Scenario: Tag with relational IN/NOT IN filter + Scenario: Tag with simple relational IN filter When profiling query: """ LOOKUP ON team WHERE team.name IN ["Hornets", "Jazz"] @@ -89,7 +89,7 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b OR c + # (a IN b) OR c When profiling query: """ LOOKUP ON player WHERE player.age IN [40, 25] OR player.name == "ABC" YIELD player.age @@ -105,7 +105,7 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b OR c IN d + # (a IN b) OR (c IN d) When profiling query: """ LOOKUP ON player WHERE player.age IN [40, 25] OR player.name IN ["Kobe Bryant"] YIELD player.age @@ -121,7 +121,7 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b AND c + # (a IN b) AND c When profiling query: """ LOOKUP ON player WHERE player.age IN [40, 25] AND player.name == "Kobe Bryant" YIELD player.age @@ -134,7 +134,37 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | - # a IN b AND c IN d + When profiling query: + """ + LOOKUP ON player WHERE player.name IN ["Kobe Bryant", "Tim Duncan"] AND player.age > 30 + """ + Then the result should be, in any order: + | VertexID | + | "Kobe Bryant" | + | "Tim Duncan" | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # c AND (a IN b) + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] AND player.name == "Kobe Bryant" YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + + Scenario: Tag with complex relational IN filter + Given an empty graph + And load "nba" csv data to a new space + # (a IN b) AND (c IN d) while a, c both have indexes When profiling query: """ LOOKUP ON player WHERE player.age IN [40, 25] AND player.name IN ["ABC", "Kobe Bryant"] YIELD player.age @@ -147,6 +177,72 @@ Feature: Lookup tag index full scan | 3 | Project | 4 | | | 4 | IndexScan | 0 | | | 0 | Start | | | + # (a IN b) AND (c IN d) while a, c have a composite index + When executing query: + """ + CREATE TAG INDEX composite_player_name_age_index ON player(name(64), age); + """ + Then the execution should be successful + And wait 6 seconds + When submit a job: + """ + REBUILD TAG INDEX composite_player_name_age_index + """ + Then wait the job to finish + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] AND player.name IN ["ABC", "Kobe Bryant"] YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + # (a IN b) AND (c IN d) while only a has index + # first drop tag index + When executing query: + """ + DROP TAG INDEX composite_player_name_age_index + """ + Then the execution should be successful + When executing query: + """ + DROP TAG INDEX player_name_index + """ + Then the execution should be successful + And wait 6 seconds + # since the tag index has been dropped, here a TagIndexFullScan should be performed + When profiling query: + """ + LOOKUP ON player WHERE player.name IN ["ABC", "Kobe Bryant"] YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 2 | | + | 2 | Filter | 4 | | + | 4 | TagIndexFullScan | 0 | | + | 0 | Start | | | + When profiling query: + """ + LOOKUP ON player WHERE player.age IN [40, 25] AND player.name IN ["ABC", "Kobe Bryant"] YIELD player.age + """ + Then the result should be, in any order: + | VertexID | player.age | + | "Kobe Bryant" | 40 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 3 | Project | 4 | | + | 4 | IndexScan | 0 | | + | 0 | Start | | | + Then drop the used space + + Scenario: Tag with relational NOT IN filter When profiling query: """ LOOKUP ON team WHERE team.name NOT IN ["Hornets", "Jazz"] @@ -264,7 +360,7 @@ Feature: Lookup tag index full scan """ Then a SemanticError should be raised at runtime: Expression (team.name NOT CONTAINS "ABC") is not supported, please use full-text index as an optimal solution - Scenario: Tag with relational STARTS/NOT STARTS WITH filter + Scenario: Tag with relational STARTS WITH filter When profiling query: """ LOOKUP ON team WHERE team.name STARTS WITH toUpper("t") @@ -295,41 +391,7 @@ Feature: Lookup tag index full scan """ LOOKUP ON team WHERE team.name NOT STARTS WITH toUpper("t") """ - Then the result should be, in any order: - | VertexID | - | "76ers" | - | "Bucks" | - | "Bulls" | - | "Cavaliers" | - | "Celtics" | - | "Clippers" | - | "Grizzlies" | - | "Hawks" | - | "Heat" | - | "Hornets" | - | "Jazz" | - | "Kings" | - | "Knicks" | - | "Lakers" | - | "Magic" | - | "Mavericks" | - | "Nets" | - | "Nuggets" | - | "Pacers" | - | "Pelicans" | - | "Pistons" | - | "Raptors" | - | "Rockets" | - | "Spurs" | - | "Suns" | - | "Wizards" | - | "Warriors" | - And the execution plan should be: - | id | name | dependencies | operator info | - | 3 | Project | 2 | | - | 2 | Filter | 4 | {"condition": "(team.name NOT STARTS WITH \"T\")"} | - | 4 | TagIndexFullScan | 0 | | - | 0 | Start | | | + Then a SemanticError should be raised at runtime: Expression (team.name NOT STARTS WITH toUpper("t")) is not supported, please use full-text index as an optimal solution Scenario: Tag with relational ENDS/NOT ENDS WITH filter When executing query: From 38a22804db411b7f47cbcbcc1bf375ecec6ee961 Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Sun, 26 Sep 2021 15:57:23 +0800 Subject: [PATCH 7/8] Add uts for rewriteInExpr() and rewriteLogicalAndToLogicalOr() --- .../rule/OptimizeTagIndexScanByFilterRule.cpp | 10 +-- src/graph/util/ExpressionUtils.cpp | 2 +- src/graph/util/test/ExpressionUtilsTest.cpp | 75 +++++++++++++++++++ src/graph/validator/LookupValidator.cpp | 4 - 4 files changed, 77 insertions(+), 14 deletions(-) diff --git a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp index 77e75735485..b06a149c0aa 100644 --- a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp +++ b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp @@ -83,15 +83,7 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul } // Case2: logical AND expr - if (condition->kind() == ExprKind::kLogicalAnd) { - // for (auto operand : static_cast(condition)->operands()) { - // if (operand->kind() == ExprKind::kRelIn) { - // return false; - // } - // } - return true; - } - return false; + return condition->kind() == ExprKind::kLogicalAnd; } TagIndexScan* makeTagIndexScan(QueryContext* qctx, const TagIndexScan* scan, bool isPrefixScan) { diff --git a/src/graph/util/ExpressionUtils.cpp b/src/graph/util/ExpressionUtils.cpp index 240513c9e92..932a638d561 100644 --- a/src/graph/util/ExpressionUtils.cpp +++ b/src/graph/util/ExpressionUtils.cpp @@ -248,7 +248,7 @@ Expression *ExpressionUtils::rewriteLogicalAndToLogicalOr(const Expression *expr } // orExprOperands is a 2D vector where each sub-vector is the operands of AND expression. - // [[A, C], [A, D], [B, C], [B,D]] => (A and C) or (A and D) or (B and C) or (B or D) + // [[A, C], [A, D], [B, C], [B,D]] => (A and C) or (A and D) or (B and C) or (B and D) std::vector andExprList; andExprList.reserve(orExprOperands.size()); for (auto &operand : orExprOperands) { diff --git a/src/graph/util/test/ExpressionUtilsTest.cpp b/src/graph/util/test/ExpressionUtilsTest.cpp index 95c3033ad26..fa6963000b8 100644 --- a/src/graph/util/test/ExpressionUtilsTest.cpp +++ b/src/graph/util/test/ExpressionUtilsTest.cpp @@ -494,6 +494,81 @@ TEST_F(ExpressionUtilsTest, flattenInnerLogicalExpr) { } } +TEST_F(ExpressionUtilsTest, rewriteInExpr) { + auto elist1 = ExpressionList::make(pool); + (*elist1).add(ConstantExpression::make(pool, 10)).add(ConstantExpression::make(pool, 20)); + auto listExpr1 = ListExpression::make(pool, elist1); + + auto elist2 = ExpressionList::make(pool); + (*elist2).add(ConstantExpression::make(pool, "a")).add(ConstantExpression::make(pool, "b")); + auto listExpr2 = ListExpression::make(pool, elist2); + + auto elist3 = ExpressionList::make(pool); + (*elist3).add(ConstantExpression::make(pool, 100)); + auto listExpr3 = ListExpression::make(pool, elist3); + + // a IN [b,c] -> a==b OR a==c + { + auto inExpr1 = + RelationalExpression::makeIn(pool, ConstantExpression::make(pool, 10), listExpr1); + auto orExpr1 = ExpressionUtils::rewriteInExpr(inExpr1); + auto expected1 = LogicalExpression::makeOr( + pool, + RelationalExpression::makeEQ( + pool, ConstantExpression::make(pool, 10), ConstantExpression::make(pool, 10)), + RelationalExpression::makeEQ( + pool, ConstantExpression::make(pool, 10), ConstantExpression::make(pool, 20))); + ASSERT_EQ(*expected1, *orExpr1); + + auto inExpr2 = + RelationalExpression::makeIn(pool, ConstantExpression::make(pool, "abc"), listExpr2); + auto orExpr2 = ExpressionUtils::rewriteInExpr(inExpr2); + auto expected2 = LogicalExpression::makeOr( + pool, + RelationalExpression::makeEQ( + pool, ConstantExpression::make(pool, "abc"), ConstantExpression::make(pool, "a")), + RelationalExpression::makeEQ( + pool, ConstantExpression::make(pool, "abc"), ConstantExpression::make(pool, "b"))); + ASSERT_EQ(*expected2, *orExpr2); + } + + // a IN [b] -> a == b + { + auto inExpr = RelationalExpression::makeIn(pool, ConstantExpression::make(pool, 10), listExpr3); + auto expected = RelationalExpression::makeEQ( + pool, ConstantExpression::make(pool, 10), ConstantExpression::make(pool, 100)); + ASSERT_EQ(*expected, *ExpressionUtils::rewriteInExpr(inExpr)); + } +} + +TEST_F(ExpressionUtilsTest, rewriteLogicalAndToLogicalOr) { + auto orExpr1 = LogicalExpression::makeOr( + pool, ConstantExpression::make(pool, 10), ConstantExpression::make(pool, 20)); + auto orExpr2 = LogicalExpression::makeOr( + pool, ConstantExpression::make(pool, "a"), ConstantExpression::make(pool, "b")); + + // (a OR b) AND (c OR d) -> (a AND c) OR (a AND d) OR (b AND c) OR (b AND d) + { + auto andExpr = LogicalExpression::makeAnd(pool, orExpr1, orExpr2); + auto transformedExpr = ExpressionUtils::rewriteLogicalAndToLogicalOr(andExpr); + + std::vector orOperands = { + LogicalExpression::makeAnd( + pool, ConstantExpression::make(pool, 10), ConstantExpression::make(pool, "a")), + LogicalExpression::makeAnd( + pool, ConstantExpression::make(pool, 10), ConstantExpression::make(pool, "b")), + LogicalExpression::makeAnd( + pool, ConstantExpression::make(pool, 20), ConstantExpression::make(pool, "a")), + LogicalExpression::makeAnd( + pool, ConstantExpression::make(pool, 20), ConstantExpression::make(pool, "b"))}; + + auto expected = LogicalExpression::makeOr(pool); + expected->setOperands(orOperands); + + ASSERT_EQ(*expected, *transformedExpr); + } +} + TEST_F(ExpressionUtilsTest, splitFilter) { using Kind = Expression::Kind; { diff --git a/src/graph/validator/LookupValidator.cpp b/src/graph/validator/LookupValidator.cpp index 4f5b5b2cd2c..98e2f77cdd9 100644 --- a/src/graph/validator/LookupValidator.cpp +++ b/src/graph/validator/LookupValidator.cpp @@ -234,10 +234,6 @@ StatusOr LookupValidator::handleLogicalExprOperands(LogicalExpressi auto& operands = lExpr->operands(); for (auto i = 0u; i < operands.size(); i++) { auto operand = lExpr->operand(i); - // if (operand->isLogicalExpr()) { - // // Not allow different logical expression to use: A AND B OR C - // return Status::SemanticError("Not supported filter: %s", lExpr->toString().c_str()); - // } auto ret = checkFilter(operand); NG_RETURN_IF_ERROR(ret); From a8ce12a4ea28e36f34a5d2d69fe86bb3a5c90905 Mon Sep 17 00:00:00 2001 From: Aiee <18348405+Aiee@users.noreply.github.com> Date: Wed, 29 Sep 2021 17:22:23 +0800 Subject: [PATCH 8/8] Check foldability of container expr in VidExtractVisitor --- src/graph/validator/LookupValidator.cpp | 2 +- src/graph/visitor/VidExtractVisitor.cpp | 3 ++- tests/tck/features/match/SeekById.feature | 16 ++++++++++++++++ tests/tck/features/match/SeekById.intVid.feature | 16 ++++++++++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/graph/validator/LookupValidator.cpp b/src/graph/validator/LookupValidator.cpp index 98e2f77cdd9..b96a0f6cafe 100644 --- a/src/graph/validator/LookupValidator.cpp +++ b/src/graph/validator/LookupValidator.cpp @@ -188,7 +188,7 @@ Status LookupValidator::validateYield() { return Status::OK(); } lookupCtx_->dedup = yieldClause->isDistinct(); - + if (lookupCtx_->isEdge) { NG_RETURN_IF_ERROR(validateYieldEdge()); } else { diff --git a/src/graph/visitor/VidExtractVisitor.cpp b/src/graph/visitor/VidExtractVisitor.cpp index 22be58bbbcc..d37a81c18b5 100644 --- a/src/graph/visitor/VidExtractVisitor.cpp +++ b/src/graph/visitor/VidExtractVisitor.cpp @@ -145,7 +145,8 @@ void VidExtractVisitor::visit(RelationalExpression *expr) { } if (expr->left()->kind() != Expression::Kind::kFunctionCall || - expr->right()->kind() != Expression::Kind::kList) { + expr->right()->kind() != Expression::Kind::kList || + !ExpressionUtils::isEvaluableExpr(expr->right())) { vidPattern_ = VidPattern{}; return; } diff --git a/tests/tck/features/match/SeekById.feature b/tests/tck/features/match/SeekById.feature index ebdba05960a..e4793ecea7b 100644 --- a/tests/tck/features/match/SeekById.feature +++ b/tests/tck/features/match/SeekById.feature @@ -138,6 +138,15 @@ Feature: Match seek by id Then the result should be, in any order: | Name | | 'James Harden' | + When executing query: + """ + MATCH (v:player) + WHERE id(v) IN ['James Harden', v.age] + RETURN v.name AS Name + """ + Then the result should be, in any order: + | Name | + | 'James Harden' | Scenario: complicate logical When executing query: @@ -251,6 +260,13 @@ Feature: Match seek by id RETURN v.name AS Name """ Then a SemanticError should be raised at runtime: + When executing query: + """ + MATCH (v) + WHERE id(v) IN ['James Harden', v.name] + RETURN v.name AS Name + """ + Then a SemanticError should be raised at runtime: Scenario: Start from end When executing query: diff --git a/tests/tck/features/match/SeekById.intVid.feature b/tests/tck/features/match/SeekById.intVid.feature index d1d427c3229..02a9f94c806 100644 --- a/tests/tck/features/match/SeekById.intVid.feature +++ b/tests/tck/features/match/SeekById.intVid.feature @@ -138,6 +138,15 @@ Feature: Match seek by id Then the result should be, in any order: | Name | | 'James Harden' | + When executing query: + """ + MATCH (v:player) + WHERE id(v) IN [hash('James Harden'), v.age] + RETURN v.name AS Name + """ + Then the result should be, in any order: + | Name | + | 'James Harden' | Scenario: complicate logical When executing query: @@ -244,6 +253,13 @@ Feature: Match seek by id RETURN v.name AS Name """ Then a SemanticError should be raised at runtime: + When executing query: + """ + MATCH (v) + WHERE id(v) IN [hash('James Harden'), v.name] + RETURN v.name AS Name + """ + Then a SemanticError should be raised at runtime: Scenario: with arithmetic When executing query: