From 0e4af6cd48930a071ae09b975473c2416dc5dcd4 Mon Sep 17 00:00:00 2001 From: Arenatlx <314806019@qq.com> Date: Tue, 23 Aug 2022 13:14:20 +0800 Subject: [PATCH] planner: fix outer join reorder will push down its outer join condition (#37245) close pingcap/tidb#37238 --- cmd/explaintest/r/explain_complex.result | 25 ++++++++ cmd/explaintest/t/explain_complex.test | 16 +++++ planner/core/rule_join_reorder.go | 78 ++++++++++++++++++------ 3 files changed, 102 insertions(+), 17 deletions(-) diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result index 43f7c0f915f14..b68393c99fcfe 100644 --- a/cmd/explaintest/r/explain_complex.result +++ b/cmd/explaintest/r/explain_complex.result @@ -261,3 +261,28 @@ Sort 1.00 root test.org_department.left_value └─TableReader(Probe) 9.99 root data:Selection └─Selection 9.99 cop[tikv] eq(test.org_employee_position.status, 1000), not(isnull(test.org_employee_position.position_id)) └─TableFullScan 10000.00 cop[tikv] table:ep keep order:false, stats:pseudo +create table test.Tab_A (id int primary key,bid int,cid int,name varchar(20),type varchar(20),num int,amt decimal(11,2)); +create table test.Tab_B (id int primary key,name varchar(20)); +create table test.Tab_C (id int primary key,name varchar(20),amt decimal(11,2)); +insert into test.Tab_A values(2,2,2,'A01','01',112,111); +insert into test.Tab_A values(4,4,4,'A02','02',112,111); +insert into test.Tab_B values(2,'B01'); +insert into test.Tab_B values(4,'B02'); +insert into test.Tab_C values(2,'C01',22); +insert into test.Tab_C values(4,'C01',5); +explain select Tab_A.name AAA,Tab_B.name BBB,Tab_A.amt Aamt, Tab_C.amt Bamt,IFNULL(Tab_C.amt, 0) FROM Tab_A left join Tab_B on Tab_A.bid=Tab_B.id left join Tab_C on Tab_A.cid=Tab_C.id and Tab_A.type='01' where Tab_A.num=112; +id estRows task access object operator info +Projection_8 15.62 root test.tab_a.name, test.tab_b.name, test.tab_a.amt, test.tab_c.amt, ifnull(test.tab_c.amt, 0)->Column#13 +└─IndexJoin_13 15.62 root left outer join, inner:TableReader_10, outer key:test.tab_a.cid, inner key:test.tab_c.id, equal cond:eq(test.tab_a.cid, test.tab_c.id), left cond:eq(test.tab_a.type, "01") + ├─IndexJoin_24(Build) 12.50 root left outer join, inner:TableReader_21, outer key:test.tab_a.bid, inner key:test.tab_b.id, equal cond:eq(test.tab_a.bid, test.tab_b.id) + │ ├─TableReader_33(Build) 10.00 root data:Selection_32 + │ │ └─Selection_32 10.00 cop[tikv] eq(test.tab_a.num, 112) + │ │ └─TableFullScan_31 10000.00 cop[tikv] table:Tab_A keep order:false, stats:pseudo + │ └─TableReader_21(Probe) 1.00 root data:TableRangeScan_20 + │ └─TableRangeScan_20 1.00 cop[tikv] table:Tab_B range: decided by [test.tab_a.bid], keep order:false, stats:pseudo + └─TableReader_10(Probe) 1.00 root data:TableRangeScan_9 + └─TableRangeScan_9 1.00 cop[tikv] table:Tab_C range: decided by [test.tab_a.cid], keep order:false, stats:pseudo +select Tab_A.name AAA,Tab_B.name BBB,Tab_A.amt Aamt, Tab_C.amt Bamt,IFNULL(Tab_C.amt, 0) FROM Tab_A left join Tab_B on Tab_A.bid=Tab_B.id left join Tab_C on Tab_A.cid=Tab_C.id and Tab_A.type='01' where Tab_A.num=112; +AAA BBB Aamt Bamt IFNULL(Tab_C.amt, 0) +A01 B01 111.00 22.00 22.00 +A02 B02 111.00 NULL 0 diff --git a/cmd/explaintest/t/explain_complex.test b/cmd/explaintest/t/explain_complex.test index 2447db8f2d53c..39a2baa357f1a 100644 --- a/cmd/explaintest/t/explain_complex.test +++ b/cmd/explaintest/t/explain_complex.test @@ -174,3 +174,19 @@ CREATE TABLE org_position ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8; explain format = 'brief' SELECT d.id, d.ctx, d.name, d.left_value, d.right_value, d.depth, d.leader_id, d.status, d.created_on, d.updated_on FROM org_department AS d LEFT JOIN org_position AS p ON p.department_id = d.id AND p.status = 1000 LEFT JOIN org_employee_position AS ep ON ep.position_id = p.id AND ep.status = 1000 WHERE (d.ctx = 1 AND (ep.user_id = 62 OR d.id = 20 OR d.id = 20) AND d.status = 1000) GROUP BY d.id ORDER BY d.left_value; + +create table test.Tab_A (id int primary key,bid int,cid int,name varchar(20),type varchar(20),num int,amt decimal(11,2)); +create table test.Tab_B (id int primary key,name varchar(20)); +create table test.Tab_C (id int primary key,name varchar(20),amt decimal(11,2)); + +insert into test.Tab_A values(2,2,2,'A01','01',112,111); +insert into test.Tab_A values(4,4,4,'A02','02',112,111); +insert into test.Tab_B values(2,'B01'); +insert into test.Tab_B values(4,'B02'); +insert into test.Tab_C values(2,'C01',22); +insert into test.Tab_C values(4,'C01',5); + +explain select Tab_A.name AAA,Tab_B.name BBB,Tab_A.amt Aamt, Tab_C.amt Bamt,IFNULL(Tab_C.amt, 0) FROM Tab_A left join Tab_B on Tab_A.bid=Tab_B.id left join Tab_C on Tab_A.cid=Tab_C.id and Tab_A.type='01' where Tab_A.num=112; + +select Tab_A.name AAA,Tab_B.name BBB,Tab_A.amt Aamt, Tab_C.amt Bamt,IFNULL(Tab_C.amt, 0) FROM Tab_A left join Tab_B on Tab_A.bid=Tab_B.id left join Tab_C on Tab_A.cid=Tab_C.id and Tab_A.type='01' where Tab_A.num=112; + diff --git a/planner/core/rule_join_reorder.go b/planner/core/rule_join_reorder.go index 20b3e1bcbf201..2c3b603db12c9 100644 --- a/planner/core/rule_join_reorder.go +++ b/planner/core/rule_join_reorder.go @@ -34,7 +34,7 @@ import ( // For example: "InnerJoin(InnerJoin(a, b), LeftJoin(c, d))" // results in a join group {a, b, c, d}. func extractJoinGroup(p LogicalPlan) (group []LogicalPlan, eqEdges []*expression.ScalarFunction, - otherConds []expression.Expression, joinTypes []JoinType, hintInfo []*tableHintInfo, hasOuterJoin bool) { + otherConds []expression.Expression, joinTypes []*joinTypeWithExtMsg, hintInfo []*tableHintInfo, hasOuterJoin bool) { join, isJoin := p.(*LogicalJoin) if isJoin && join.preferJoinOrder { // When there is a leading hint, the hint may not take effect for other reasons. @@ -129,12 +129,26 @@ func extractJoinGroup(p LogicalPlan) (group []LogicalPlan, eqEdges []*expression } eqEdges = append(eqEdges, join.EqualConditions...) - otherConds = append(otherConds, join.OtherConditions...) - otherConds = append(otherConds, join.LeftConditions...) - otherConds = append(otherConds, join.RightConditions...) - for range join.EqualConditions { - joinTypes = append(joinTypes, join.JoinType) + tmpOtherConds := make(expression.CNFExprs, 0, len(join.OtherConditions)+len(join.LeftConditions)+len(join.RightConditions)) + tmpOtherConds = append(tmpOtherConds, join.OtherConditions...) + tmpOtherConds = append(tmpOtherConds, join.LeftConditions...) + tmpOtherConds = append(tmpOtherConds, join.RightConditions...) + if join.JoinType == LeftOuterJoin || join.JoinType == RightOuterJoin { + for range join.EqualConditions { + abType := &joinTypeWithExtMsg{JoinType: join.JoinType} + // outer join's other condition should be bound with the connecting edge. + // although we bind the outer condition to **anyone** of the join type, it will be extracted **only once** when make a new join. + abType.outerBindCondition = tmpOtherConds + joinTypes = append(joinTypes, abType) + } + } else { + for range join.EqualConditions { + abType := &joinTypeWithExtMsg{JoinType: join.JoinType} + joinTypes = append(joinTypes, abType) + } + otherConds = append(otherConds, tmpOtherConds...) } + return group, eqEdges, otherConds, joinTypes, hintInfo, hasOuterJoin } @@ -146,6 +160,11 @@ type jrNode struct { cumCost float64 } +type joinTypeWithExtMsg struct { + JoinType + outerBindCondition []expression.Expression +} + func (s *joinReOrderSolver) optimize(_ context.Context, p LogicalPlan, opt *logicalOptimizeOp) (LogicalPlan, error) { tracer := &joinReorderTrace{cost: map[string]float64{}, opt: opt} tracer.traceJoinReorder(p) @@ -172,7 +191,7 @@ func (s *joinReOrderSolver) optimizeRecursive(ctx sessionctx.Context, p LogicalP // Not support outer join reorder when using the DP algorithm isSupportDP := true for _, joinType := range joinTypes { - if joinType != InnerJoin { + if joinType.JoinType != InnerJoin { isSupportDP = false break } @@ -290,7 +309,7 @@ type baseSingleGroupJoinOrderSolver struct { curJoinGroup []*jrNode otherConds []expression.Expression eqEdges []*expression.ScalarFunction - joinTypes []JoinType + joinTypes []*joinTypeWithExtMsg leadingJoinGroup LogicalPlan } @@ -318,7 +337,7 @@ func (s *baseSingleGroupJoinOrderSolver) generateLeadingJoinGroup(curJoinGroup [ leadingJoinGroup = leadingJoinGroup[1:] for len(leadingJoinGroup) > 0 { var usedEdges []*expression.ScalarFunction - var joinType JoinType + var joinType *joinTypeWithExtMsg leadingJoin, leadingJoinGroup[0], usedEdges, joinType = s.checkConnection(leadingJoin, leadingJoinGroup[0]) if hasOuterJoin && usedEdges == nil { // If the joinGroups contain the outer join, we disable the cartesian product. @@ -359,8 +378,8 @@ func (s *baseSingleGroupJoinOrderSolver) baseNodeCumCost(groupNode LogicalPlan) } // checkConnection used to check whether two nodes have equal conditions or not. -func (s *baseSingleGroupJoinOrderSolver) checkConnection(leftPlan, rightPlan LogicalPlan) (leftNode, rightNode LogicalPlan, usedEdges []*expression.ScalarFunction, joinType JoinType) { - joinType = InnerJoin +func (s *baseSingleGroupJoinOrderSolver) checkConnection(leftPlan, rightPlan LogicalPlan) (leftNode, rightNode LogicalPlan, usedEdges []*expression.ScalarFunction, joinType *joinTypeWithExtMsg) { + joinType = &joinTypeWithExtMsg{JoinType: InnerJoin} leftNode, rightNode = leftPlan, rightPlan for idx, edge := range s.eqEdges { lCol := edge.GetArgs()[0].(*expression.Column) @@ -370,7 +389,7 @@ func (s *baseSingleGroupJoinOrderSolver) checkConnection(leftPlan, rightPlan Log usedEdges = append(usedEdges, edge) } else if rightPlan.Schema().Contains(lCol) && leftPlan.Schema().Contains(rCol) { joinType = s.joinTypes[idx] - if joinType != InnerJoin { + if joinType.JoinType != InnerJoin { rightNode, leftNode = leftPlan, rightPlan usedEdges = append(usedEdges, edge) } else { @@ -383,12 +402,19 @@ func (s *baseSingleGroupJoinOrderSolver) checkConnection(leftPlan, rightPlan Log } // makeJoin build join tree for the nodes which have equal conditions to connect them. -func (s *baseSingleGroupJoinOrderSolver) makeJoin(leftPlan, rightPlan LogicalPlan, eqEdges []*expression.ScalarFunction, joinType JoinType) (LogicalPlan, []expression.Expression) { +func (s *baseSingleGroupJoinOrderSolver) makeJoin(leftPlan, rightPlan LogicalPlan, eqEdges []*expression.ScalarFunction, joinType *joinTypeWithExtMsg) (LogicalPlan, []expression.Expression) { remainOtherConds := make([]expression.Expression, len(s.otherConds)) copy(remainOtherConds, s.otherConds) - var otherConds []expression.Expression - var leftConds []expression.Expression - var rightConds []expression.Expression + var ( + otherConds []expression.Expression + leftConds []expression.Expression + rightConds []expression.Expression + + // for outer bind conditions + obOtherConds []expression.Expression + obLeftConds []expression.Expression + obRightConds []expression.Expression + ) mergedSchema := expression.MergeSchema(leftPlan.Schema(), rightPlan.Schema()) remainOtherConds, leftConds = expression.FilterOutInPlace(remainOtherConds, func(expr expression.Expression) bool { @@ -400,7 +426,25 @@ func (s *baseSingleGroupJoinOrderSolver) makeJoin(leftPlan, rightPlan LogicalPla remainOtherConds, otherConds = expression.FilterOutInPlace(remainOtherConds, func(expr expression.Expression) bool { return expression.ExprFromSchema(expr, mergedSchema) }) - return s.newJoinWithEdges(leftPlan, rightPlan, eqEdges, otherConds, leftConds, rightConds, joinType), remainOtherConds + if len(joinType.outerBindCondition) > 0 { + remainOBOtherConds := make([]expression.Expression, len(joinType.outerBindCondition)) + copy(remainOBOtherConds, joinType.outerBindCondition) + remainOBOtherConds, obLeftConds = expression.FilterOutInPlace(remainOBOtherConds, func(expr expression.Expression) bool { + return expression.ExprFromSchema(expr, leftPlan.Schema()) && !expression.ExprFromSchema(expr, rightPlan.Schema()) + }) + remainOBOtherConds, obRightConds = expression.FilterOutInPlace(remainOBOtherConds, func(expr expression.Expression) bool { + return expression.ExprFromSchema(expr, rightPlan.Schema()) && !expression.ExprFromSchema(expr, leftPlan.Schema()) + }) + // _ here make the linter happy. + _, obOtherConds = expression.FilterOutInPlace(remainOBOtherConds, func(expr expression.Expression) bool { + return expression.ExprFromSchema(expr, mergedSchema) + }) + // case like: (A * B) left outer join C on (A.a = C.a && B.b > 0) will remain B.b > 0 in remainOBOtherConds (while this case + // has been forbidden by: filters of the outer join is related with multiple leaves of the outer join side in #34603) + // so noway here we got remainOBOtherConds remained. + } + return s.newJoinWithEdges(leftPlan, rightPlan, eqEdges, + append(otherConds, obOtherConds...), append(leftConds, obLeftConds...), append(rightConds, obRightConds...), joinType.JoinType), remainOtherConds } // makeBushyJoin build bushy tree for the nodes which have no equal condition to connect them.