vesoft-inc · bright-starry-sky · Sep 30, 2021 · Sep 15, 2021 · Sep 18, 2021 · Sep 18, 2021
diff --git a/src/graph/optimizer/OptimizerUtils.cpp b/src/graph/optimizer/OptimizerUtils.cpp
@@ -642,7 +642,12 @@ StatusOr<ScoredColumnHint> selectRelExprIndex(const ColumnDef& field,
   }
 
   auto right = expr->right();
-  DCHECK(right->kind() == Expression::Kind::kConstant);
+  if (expr->kind() == Expression::Kind::kRelIn) {  // container expressions
+    DCHECK(right->isContainerExpr());
+  } else {  // other expressions
+    DCHECK(right->kind() == Expression::Kind::kConstant);
+  }
+
   const auto& value = static_cast<const ConstantExpression*>(right)->value();
 
   ScoredColumnHint hint;
@@ -912,6 +917,32 @@ bool OptimizerUtils::findOptimalIndex(const Expression* condition,
   return true;
 }
 
+// Check if the relational expression has a valid index
+// The left operand should either be a kEdgeProperty or kTagProperty expr
+bool OptimizerUtils::relExprHasIndex(
+    const Expression* expr,
+    const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems) {
+  DCHECK(expr->isRelExpr());
+
+  for (auto& index : indexItems) {
+    const auto& fields = index->get_fields();
+    if (fields.empty()) {
+      return false;
+    }
+
+    auto left = static_cast<const RelationalExpression*>(expr)->left();
+    DCHECK(left->kind() == Expression::Kind::kEdgeProperty ||
+           left->kind() == Expression::Kind::kTagProperty);
+
+    auto propExpr = static_cast<const PropertyExpression*>(left);
+    if (propExpr->prop() == fields[0].get_name()) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
 void OptimizerUtils::copyIndexScanData(const nebula::graph::IndexScan* from,
                                        nebula::graph::IndexScan* to) {
   to->setEmptyResultSet(from->isEmptyResultSet());

diff --git a/src/graph/optimizer/OptimizerUtils.h b/src/graph/optimizer/OptimizerUtils.h
@@ -95,6 +95,10 @@ class OptimizerUtils {
       bool* isPrefixScan,
       nebula::storage::cpp2::IndexQueryContext* ictx);
 
+  static bool relExprHasIndex(
+      const Expression* expr,
+      const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems);
+
   static void copyIndexScanData(const nebula::graph::IndexScan* from, nebula::graph::IndexScan* to);
 };
 

diff --git a/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp b/src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp
@@ -46,6 +46,15 @@ const Pattern& OptimizeTagIndexScanByFilterRule::pattern() const {
   return pattern;
 }
 
+// Match 2 kinds of expressions:
+//
+// 1. Relational expr. If it is an IN expr, its list MUST have only 1 element, so it could always be
+// transformed to an relEQ expr. i.g.  A in [B]  =>  A == B
+// It the list has more than 1 element, the expr will be matched with UnionAllIndexScanBaseRule.
+//
+// 2. Logical AND expr. If the AND expr contains an operand that is an IN expr, the label attribute
+// in the IN expr SHOULD NOT have a valid index, otherwise the expression should be matched with
+// UnionAllIndexScanBaseRule.
 bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResult& matched) const {
   if (!OptRule::match(ctx, matched)) {
     return false;
@@ -58,16 +67,23 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul
     }
   }
   auto condition = filter->condition();
+
+  // Case1: relational expr
   if (condition->isRelExpr()) {
     auto relExpr = static_cast<const RelationalExpression*>(condition);
+    // If the container in the IN expr has only 1 element, it will be converted to an relEQ
+    // expr. If more than 1 element found in the container, UnionAllIndexScanBaseRule will be
+    // applied.
+    if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
+      auto ContainerOperands = graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
+      return ContainerOperands.size() == 1;
+    }
     return relExpr->left()->kind() == ExprKind::kTagProperty &&
            relExpr->right()->kind() == ExprKind::kConstant;
   }
-  if (condition->isLogicalExpr()) {
-    return condition->kind() == Expression::Kind::kLogicalAnd;
-  }
 
-  return false;
+  // Case2: logical AND expr
+  return condition->kind() == ExprKind::kLogicalAnd;
 }
 
 TagIndexScan* makeTagIndexScan(QueryContext* qctx, const TagIndexScan* scan, bool isPrefixScan) {
@@ -94,9 +110,38 @@ StatusOr<TransformResult> OptimizeTagIndexScanByFilterRule::transform(
 
   OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems);
 
+  auto condition = filter->condition();
+  auto conditionType = condition->kind();
+  Expression* transformedExpr = condition->clone();
+
+  // Stand alone IN expr with only 1 element in the list, no need to check index
+  if (conditionType == ExprKind::kRelIn) {
+    transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
+    DCHECK(transformedExpr->kind() == ExprKind::kRelEQ);
+  }
+
+  // case2: logical AND expr
+  if (condition->kind() == ExprKind::kLogicalAnd) {
+    for (auto& operand : static_cast<const LogicalExpression*>(condition)->operands()) {
+      if (operand->kind() == ExprKind::kRelIn) {
+        auto inExpr = static_cast<RelationalExpression*>(operand);
+        // Do not apply this rule if the IN expr has a valid index or it has only 1 element in the
+        // list
+        if (static_cast<ListExpression*>(inExpr->right())->size() > 1) {
+          return TransformResult::noTransform();
+        } else {
+          transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
+        }
+        if (OptimizerUtils::relExprHasIndex(inExpr, indexItems)) {
+          return TransformResult::noTransform();
+        }
+      }
+    }
+  }
+
   IndexQueryContext ictx;
   bool isPrefixScan = false;
-  if (!OptimizerUtils::findOptimalIndex(filter->condition(), indexItems, &isPrefixScan, &ictx)) {
+  if (!OptimizerUtils::findOptimalIndex(transformedExpr, indexItems, &isPrefixScan, &ictx)) {
     return TransformResult::noTransform();
   }
 

diff --git a/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp b/src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp
@@ -15,6 +15,7 @@
 #include "graph/planner/plan/PlanNode.h"
 #include "graph/planner/plan/Query.h"
 #include "graph/planner/plan/Scan.h"
+#include "graph/util/ExpressionUtils.h"
 #include "interface/gen-cpp2/storage_types.h"
 
 using nebula::graph::Filter;
@@ -24,25 +25,58 @@ using nebula::graph::TagIndexFullScan;
 using nebula::storage::cpp2::IndexQueryContext;
 
 using Kind = nebula::graph::PlanNode::Kind;
+using ExprKind = nebula::Expression::Kind;
 using TransformResult = nebula::opt::OptRule::TransformResult;
 
 namespace nebula {
 namespace opt {
 
+// The matched expression should be either a OR expression or an expression that could be
+// rewrote to a OR expression. There are 3 senarios.
+//
+// 1. OR expr. If OR expr has an IN expr operand that has a valid index, expand it to OR expr.
+//
+// 2. AND expr such as A in [a, b] AND B when A has a valid index, because it can be transformed to
+// (A==a AND B) OR (A==b AND B)
+//
+// 3. IN expr with its list size > 1, such as A in [a, b] since it can be transformed to (A==a) OR
+// (A==b).
+// If the list has a size of 1, the expr will be matched with OptimizeTagIndexScanByFilterRule.
 bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const {
   if (!OptRule::match(ctx, matched)) {
     return false;
   }
   auto filter = static_cast<const Filter*>(matched.planNode());
   auto scan = static_cast<const IndexScan*>(matched.planNode({0, 0}));
   auto condition = filter->condition();
-  if (!condition->isLogicalExpr() || condition->kind() != Expression::Kind::kLogicalOr) {
-    return false;
+  auto conditionType = condition->kind();
+
+  if (condition->isLogicalExpr()) {
+    // Case1: OR Expr
+    if (conditionType == ExprKind::kLogicalOr) {
+      return true;
+    }
+    // Case2: AND Expr
+    if (conditionType == ExprKind::kLogicalAnd &&
+        graph::ExpressionUtils::findAny(static_cast<LogicalExpression*>(condition),
+                                        {ExprKind::kRelIn})) {
+      return true;
+    }
+    // Check logical operands
+    for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
+      if (!operand->isRelExpr() || !operand->isLogicalExpr()) {
+        return false;
+      }
+    }
   }
 
-  for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
-    if (!operand->isRelExpr()) {
-      return false;
+  // If the number of elements is less or equal than 1, the IN expr will be transformed into a
+  // relEQ expr by the OptimizeTagIndexScanByFilterRule.
+  if (condition->isRelExpr()) {
+    auto relExpr = static_cast<const RelationalExpression*>(condition);
+    if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
+      auto operandsVec = graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
+      return operandsVec.size() > 1;
     }
   }
 
@@ -52,7 +86,7 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc
     }
   }
 
-  return true;
+  return false;
 }
 
 StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx,
@@ -62,20 +96,77 @@ StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx,
   auto scan = static_cast<const IndexScan*>(node);
 
   auto metaClient = ctx->qctx()->getMetaClient();
-  StatusOr<std::vector<std::shared_ptr<meta::cpp2::IndexItem>>> status;
-  if (node->kind() == graph::PlanNode::Kind::kTagIndexFullScan) {
-    status = metaClient->getTagIndexesFromCache(scan->space());
-  } else {
-    status = metaClient->getEdgeIndexesFromCache(scan->space());
-  }
+  auto status = node->kind() == graph::PlanNode::Kind::kTagIndexFullScan
+                    ? metaClient->getTagIndexesFromCache(scan->space())
+                    : metaClient->getEdgeIndexesFromCache(scan->space());
+
   NG_RETURN_IF_ERROR(status);
   auto indexItems = std::move(status).value();
 
   OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems);
 
+  // Check whether the prop has index.
+  // Rewrite if the property in the IN expr has a valid index
+  if (indexItems.empty()) {
+    return TransformResult::noTransform();
+  }
+
+  auto condition = filter->condition();
+  auto conditionType = condition->kind();
+  Expression* transformedExpr = condition->clone();
+
+  switch (conditionType) {
+    // Stand alone IN expr
+    // If it has multiple elements in the list, check valid index before expanding to OR expr
+    case ExprKind::kRelIn: {
+      if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) {
+        return TransformResult::noTransform();
+      }
+      transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
+      break;
+    }
+
+    // AND expr containing IN expr operand
+    case ExprKind::kLogicalAnd: {
+      // Iterate all operands and expand IN exprs if possible
+      for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
+        if (expr->kind() == ExprKind::kRelIn) {
+          if (OptimizerUtils::relExprHasIndex(expr, indexItems)) {
+            expr = graph::ExpressionUtils::rewriteInExpr(expr);
+          }
+        }
+      }
+
+      // Reconstruct AND expr using distributive law
+      transformedExpr = graph::ExpressionUtils::rewriteLogicalAndToLogicalOr(transformedExpr);
+      break;
+    }
+
+    // OR expr
+    case ExprKind::kLogicalOr: {
+      // Iterate all operands and expand IN exprs if possible
+      for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
+        if (expr->kind() == ExprKind::kRelIn) {
+          if (OptimizerUtils::relExprHasIndex(expr, indexItems)) {
+            expr = graph::ExpressionUtils::rewriteInExpr(expr);
+          }
+        }
+      }
+      // Flatten OR exprs
+      graph::ExpressionUtils::pullOrs(transformedExpr);
+
+      break;
+    }
+    default:
+      LOG(FATAL) << "Invalid expression kind: " << static_cast<uint8_t>(conditionType);
+      break;
+  }
+
+  DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr ||
+         transformedExpr->kind() == ExprKind::kRelEQ);
   std::vector<IndexQueryContext> idxCtxs;
-  auto condition = static_cast<const LogicalExpression*>(filter->condition());
-  for (auto operand : condition->operands()) {
+  auto logicalExpr = static_cast<const LogicalExpression*>(transformedExpr);
+  for (auto operand : logicalExpr->operands()) {
     IndexQueryContext ictx;
     bool isPrefixScan = false;
     if (!OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)) {