From 52064dd41436b3d831392e4dff255355baf4e8bd Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Thu, 16 Nov 2023 22:17:30 +0800 Subject: [PATCH] [fix](planner)scan node should project all required expr from parent node #26886 (#27096) --- .../doris/analysis/TupleDescriptor.java | 18 +++++ .../apache/doris/planner/HashJoinNode.java | 8 +-- .../apache/doris/planner/JoinNodeBase.java | 8 +-- .../org/apache/doris/planner/ScanNode.java | 46 +++++++++++++ .../doris/planner/JoinCostEvaluationTest.java | 4 +- .../doris/planner/SingleNodePlannerTest.java | 40 ++++++------ .../test_inlineview_with_project.out | 3 + .../test_inlineview_with_project.groovy | 65 +++++++++++++++++++ 8 files changed, 162 insertions(+), 30 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java index 0bc2d28fe2f0c6..e4e130fc6fe888 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java @@ -146,6 +146,24 @@ public ArrayList getMaterializedSlots() { return result; } + public ArrayList getMaterializedSlotIds() { + ArrayList result = Lists.newArrayList(); + for (SlotDescriptor slot : slots) { + if (slot.isMaterialized()) { + result.add(slot.getId()); + } + } + return result; + } + + public ArrayList getAllSlotIds() { + ArrayList result = Lists.newArrayList(); + for (SlotDescriptor slot : slots) { + result.add(slot.getId()); + } + return result; + } + /** * Return slot descriptor corresponding to column referenced in the context * of tupleDesc, or null if no such reference exists. diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java index 02418fc01ac31c..279841810c5f01 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java @@ -94,12 +94,12 @@ public HashJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, TableRef inne if (joinOp.equals(JoinOperator.LEFT_ANTI_JOIN) || joinOp.equals(JoinOperator.LEFT_SEMI_JOIN) || joinOp.equals(JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN)) { - tupleIds.addAll(outer.getTupleIds()); + tupleIds.addAll(outer.getOutputTupleIds()); } else if (joinOp.equals(JoinOperator.RIGHT_ANTI_JOIN) || joinOp.equals(JoinOperator.RIGHT_SEMI_JOIN)) { - tupleIds.addAll(inner.getTupleIds()); + tupleIds.addAll(inner.getOutputTupleIds()); } else { - tupleIds.addAll(outer.getTupleIds()); - tupleIds.addAll(inner.getTupleIds()); + tupleIds.addAll(outer.getOutputTupleIds()); + tupleIds.addAll(inner.getOutputTupleIds()); } for (Expr eqJoinPredicate : eqJoinConjuncts) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java b/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java index fe035d31050ee3..b635cfda59d992 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java @@ -77,12 +77,12 @@ public JoinNodeBase(PlanNodeId id, String planNodeName, StatisticalType statisti joinOp = innerRef.getJoinOp(); if (joinOp.equals(JoinOperator.FULL_OUTER_JOIN)) { - nullableTupleIds.addAll(outer.getTupleIds()); - nullableTupleIds.addAll(inner.getTupleIds()); + nullableTupleIds.addAll(outer.getOutputTupleIds()); + nullableTupleIds.addAll(inner.getOutputTupleIds()); } else if (joinOp.equals(JoinOperator.LEFT_OUTER_JOIN)) { - nullableTupleIds.addAll(inner.getTupleIds()); + nullableTupleIds.addAll(inner.getOutputTupleIds()); } else if (joinOp.equals(JoinOperator.RIGHT_OUTER_JOIN)) { - nullableTupleIds.addAll(outer.getTupleIds()); + nullableTupleIds.addAll(outer.getOutputTupleIds()); } this.isMark = this.innerRef != null && innerRef.isMark(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java index 9398dabcd367e2..648eac047d450c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java @@ -53,6 +53,7 @@ import org.apache.doris.thrift.TScanRangeLocations; import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Range; @@ -62,6 +63,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -81,6 +84,9 @@ public abstract class ScanNode extends PlanNode { protected Analyzer analyzer; protected List scanRangeLocations = Lists.newArrayList(); + // create a mapping between output slot's id and project expr + Map outputSlotToProjectExpr = new HashMap<>(); + public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, StatisticalType statisticalType) { super(id, desc.getId().asList(), planNodeName, statisticalType); this.desc = desc; @@ -592,6 +598,7 @@ public void setOutputSmap(ExprSubstitutionMap smap, Analyzer analyzer) { } newRhs.add(new SlotRef(slotDesc)); allOutputSlotIds.add(slotDesc.getId()); + outputSlotToProjectExpr.put(slotDesc.getId(), rhsExpr); } else { newRhs.add(rhs.get(i)); } @@ -603,6 +610,45 @@ public void setOutputSmap(ExprSubstitutionMap smap, Analyzer analyzer) { } } + @Override + public void initOutputSlotIds(Set requiredSlotIdSet, Analyzer analyzer) { + if (outputTupleDesc != null && requiredSlotIdSet != null) { + Preconditions.checkNotNull(outputSmap); + ArrayList materializedSlotIds = outputTupleDesc.getMaterializedSlotIds(); + Preconditions.checkState(projectList != null && projectList.size() <= materializedSlotIds.size(), + "projectList's size should be less than materializedSlotIds's size"); + boolean hasNewSlot = false; + if (projectList.size() < materializedSlotIds.size()) { + // need recreate projectList based on materializedSlotIds + hasNewSlot = true; + } + + // find new project expr from outputSmap based on requiredSlotIdSet + ArrayList allSlots = outputTupleDesc.getAllSlotIds(); + for (SlotId slotId : requiredSlotIdSet) { + if (!materializedSlotIds.contains(slotId) && allSlots.contains(slotId)) { + SlotDescriptor slot = outputTupleDesc.getSlot(slotId.asInt()); + for (Expr expr : outputSmap.getRhs()) { + if (expr instanceof SlotRef && ((SlotRef) expr).getSlotId() == slotId) { + slot.setIsMaterialized(true); + outputSlotToProjectExpr.put(slotId, expr.getSrcSlotRef()); + hasNewSlot = true; + } + } + } + } + + if (hasNewSlot) { + // recreate the project list + projectList.clear(); + materializedSlotIds = outputTupleDesc.getMaterializedSlotIds(); + for (SlotId slotId : materializedSlotIds) { + projectList.add(outputSlotToProjectExpr.get(slotId)); + } + } + } + } + public List getOutputTupleIds() { if (outputTupleDesc != null) { return Lists.newArrayList(outputTupleDesc.getId()); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/JoinCostEvaluationTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/JoinCostEvaluationTest.java index 00646b63e8aa7b..a45402516e64df 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/JoinCostEvaluationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/JoinCostEvaluationTest.java @@ -58,7 +58,7 @@ public class JoinCostEvaluationTest { public void setUp() { new Expectations() { { - node.getTupleIds(); + node.getOutputTupleIds(); result = Lists.newArrayList(); node.getTblRefIds(); result = Lists.newArrayList(); @@ -114,7 +114,7 @@ public void testConstructHashTableSpace() { double nodeArrayLen = 6144; new Expectations() { { - node.getTupleIds(); + node.getOutputTupleIds(); result = new ArrayList<>(Collections.nCopies(rhsNodeTupleIdNum, 0)); } }; diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/SingleNodePlannerTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/SingleNodePlannerTest.java index b88e41a595eb45..ce8248de35aa7e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/SingleNodePlannerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/SingleNodePlannerTest.java @@ -139,9 +139,9 @@ public void testJoinReorderWithTwoTuple1(@Injectable PlannerContext context, result = eqSlot2; eqSlot2.isBoundByTupleIds(new ArrayList<>()); result = true; - scanNode1.getTupleIds(); + scanNode1.getOutputTupleIds(); result = Lists.newArrayList(); - scanNode2.getTupleIds(); + scanNode2.getOutputTupleIds(); result = Lists.newArrayList(); scanNode1.getOutputSmap(); result = null; @@ -239,9 +239,9 @@ public void testJoinReorderWithTwoTuple2(@Injectable PlannerContext context, result = eqSlot2; eqSlot2.isBoundByTupleIds(new ArrayList<>()); result = true; - scanNode1.getTupleIds(); + scanNode1.getOutputTupleIds(); result = Lists.newArrayList(); - scanNode2.getTupleIds(); + scanNode2.getOutputTupleIds(); result = Lists.newArrayList(); scanNode1.getOutputSmap(); result = null; @@ -337,9 +337,9 @@ public void testJoinReorderWithTwoTuple3(@Injectable PlannerContext context, result = eqSlot2; eqSlot2.isBoundByTupleIds(new ArrayList<>()); result = true; - scanNode1.getTupleIds(); + scanNode1.getOutputTupleIds(); result = Lists.newArrayList(); - scanNode2.getTupleIds(); + scanNode2.getOutputTupleIds(); result = Lists.newArrayList(); scanNode1.getOutputSmap(); result = null; @@ -487,11 +487,11 @@ public void testKeepRightTableRefOnLeftJoin(@Injectable PlannerContext context, result = eqT1Slot1; eqBinaryPredicate3.getChild(1); result = eqT3Slot3; - scanNode1.getTupleIds(); + scanNode1.getOutputTupleIds(); result = tupleIds1; - scanNode2.getTupleIds(); + scanNode2.getOutputTupleIds(); result = tupleIds2; - scanNode3.getTupleIds(); + scanNode3.getOutputTupleIds(); result = tupleId3; scanNode1.getOutputSmap(); result = null; @@ -649,11 +649,11 @@ public void testKeepRightTableRefOnRightJoin(@Injectable PlannerContext context, result = eqT1Slot1; eqBinaryPredicate3.getChild(1); result = eqT3Slot3; - scanNode1.getTupleIds(); + scanNode1.getOutputTupleIds(); result = tupleIds1; - scanNode2.getTupleIds(); + scanNode2.getOutputTupleIds(); result = tupleIds2; - scanNode3.getTupleIds(); + scanNode3.getOutputTupleIds(); result = tupleId3; scanNode1.getOutputSmap(); result = null; @@ -830,13 +830,13 @@ public void testMultiInnerJoinReorderAvoidCrossJoin(@Injectable PlannerContext c scanNode4.getTblRefIds(); result = tupleIds4; - scanNode1.getTupleIds(); + scanNode1.getOutputTupleIds(); result = tupleIds1; - scanNode2.getTupleIds(); + scanNode2.getOutputTupleIds(); result = tupleIds2; - scanNode3.getTupleIds(); + scanNode3.getOutputTupleIds(); result = tupleIds3; - scanNode4.getTupleIds(); + scanNode4.getOutputTupleIds(); result = tupleIds4; scanNode1.getOutputSmap(); result = null; @@ -1037,13 +1037,13 @@ public void testMultiInnerJoinMultiJoinPredicateReorder(@Injectable PlannerConte scanNode4.getTblRefIds(); result = tupleIds4; - scanNode1.getTupleIds(); + scanNode1.getOutputTupleIds(); result = tupleIds1; - scanNode2.getTupleIds(); + scanNode2.getOutputTupleIds(); result = tupleIds2; - scanNode3.getTupleIds(); + scanNode3.getOutputTupleIds(); result = tupleIds3; - scanNode4.getTupleIds(); + scanNode4.getOutputTupleIds(); result = tupleIds4; scanNode1.getOutputSmap(); result = null; diff --git a/regression-test/data/correctness_p0/test_inlineview_with_project.out b/regression-test/data/correctness_p0/test_inlineview_with_project.out index 238ba7ef7e45b1..550b958d4d9da0 100644 --- a/regression-test/data/correctness_p0/test_inlineview_with_project.out +++ b/regression-test/data/correctness_p0/test_inlineview_with_project.out @@ -10,3 +10,6 @@ -- !select4 -- 0.0 +-- !select5 -- +3 + diff --git a/regression-test/suites/correctness_p0/test_inlineview_with_project.groovy b/regression-test/suites/correctness_p0/test_inlineview_with_project.groovy index 58f46f790180b0..be5dd9dddfd4df 100644 --- a/regression-test/suites/correctness_p0/test_inlineview_with_project.groovy +++ b/regression-test/suites/correctness_p0/test_inlineview_with_project.groovy @@ -455,4 +455,69 @@ suite("test_inlineview_with_project") { `t1`.`plate_id` = `t2`.`material_id`) t5 ON 1 = 1 )res;""" + + sql """DROP TABLE IF EXISTS `dr_user_test_t1`;""" + sql """CREATE TABLE `dr_user_test_t1` ( + `caseId` varchar(500) NULL + ) ENGINE=OLAP + UNIQUE KEY(`caseId`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`caseId`) BUCKETS 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """DROP TABLE IF EXISTS `dr_user_test_t2`;""" + sql """CREATE TABLE `dr_user_test_t2` ( + `id` varchar(500) NULL COMMENT 'id', + `caseId` varchar(500) NULL, + `content` text NULL, + `timestamp` datetime NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """insert into dr_user_test_t1 values('1'),('2'),('3');""" + sql """insert into dr_user_test_t2 values('1','1','1','2020-02-02 22:22:22'), ('2','2','2','2020-02-02 22:22:22'), ('3','3','3','2020-02-02 22:22:22');""" + + qt_select5 """ + SELECT COUNT(*) + FROM (WITH test_01 AS + (SELECT caseId, + count(judgementDate_labelObject) + FROM + (SELECT CASE_COLUMN_TABLE.caseId AS caseId , + `judgementDate_labelObject` + FROM + (SELECT CASE_ID_TABLE.caseId , + JSON_OBJECT('id', `judgementDate_TABLE`.`judgementDateId`, 'content', `judgementDate_TABLE`.`judgementDate`) AS `judgementDate_labelObject` + FROM + (SELECT DISTINCT caseId + FROM dr_user_test_t1) CASE_ID_TABLE + LEFT JOIN + (SELECT caseId, + id AS `judgementDateId`, + (CASE + WHEN `timestamp` IS NOT NULL THEN + to_date(`timestamp`) + ELSE content END) AS `judgementDate` + FROM dr_user_test_t2) `judgementDate_TABLE` + ON CASE_ID_TABLE.caseId = `judgementDate_TABLE`.caseId + LEFT JOIN + (SELECT caseId, + id AS `xx`, + content AS `xxx` + FROM dr_user_test_t2) `xxxx` + ON CASE_ID_TABLE.caseId = `xxxx`.caseId) CASE_COLUMN_TABLE) AGG_RESULT + GROUP BY caseId) + SELECT caseId + FROM test_01 ) TOTAL; + """ + + sql """DROP TABLE IF EXISTS `dr_user_test_t1`;""" + sql """DROP TABLE IF EXISTS `dr_user_test_t2`;""" }