Skip to content

Commit

Permalink
planner: fix outer join reorder will push down its outer join conditi…
Browse files Browse the repository at this point in the history
…on (#37245)

close #37238
  • Loading branch information
AilinKid authored Aug 23, 2022
1 parent 6206b95 commit 0e4af6c
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 17 deletions.
25 changes: 25 additions & 0 deletions cmd/explaintest/r/explain_complex.result
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,28 @@ Sort 1.00 root test.org_department.left_value
└─TableReader(Probe) 9.99 root data:Selection
└─Selection 9.99 cop[tikv] eq(test.org_employee_position.status, 1000), not(isnull(test.org_employee_position.position_id))
└─TableFullScan 10000.00 cop[tikv] table:ep keep order:false, stats:pseudo
create table test.Tab_A (id int primary key,bid int,cid int,name varchar(20),type varchar(20),num int,amt decimal(11,2));
create table test.Tab_B (id int primary key,name varchar(20));
create table test.Tab_C (id int primary key,name varchar(20),amt decimal(11,2));
insert into test.Tab_A values(2,2,2,'A01','01',112,111);
insert into test.Tab_A values(4,4,4,'A02','02',112,111);
insert into test.Tab_B values(2,'B01');
insert into test.Tab_B values(4,'B02');
insert into test.Tab_C values(2,'C01',22);
insert into test.Tab_C values(4,'C01',5);
explain select Tab_A.name AAA,Tab_B.name BBB,Tab_A.amt Aamt, Tab_C.amt Bamt,IFNULL(Tab_C.amt, 0) FROM Tab_A left join Tab_B on Tab_A.bid=Tab_B.id left join Tab_C on Tab_A.cid=Tab_C.id and Tab_A.type='01' where Tab_A.num=112;
id estRows task access object operator info
Projection_8 15.62 root test.tab_a.name, test.tab_b.name, test.tab_a.amt, test.tab_c.amt, ifnull(test.tab_c.amt, 0)->Column#13
└─IndexJoin_13 15.62 root left outer join, inner:TableReader_10, outer key:test.tab_a.cid, inner key:test.tab_c.id, equal cond:eq(test.tab_a.cid, test.tab_c.id), left cond:eq(test.tab_a.type, "01")
├─IndexJoin_24(Build) 12.50 root left outer join, inner:TableReader_21, outer key:test.tab_a.bid, inner key:test.tab_b.id, equal cond:eq(test.tab_a.bid, test.tab_b.id)
│ ├─TableReader_33(Build) 10.00 root data:Selection_32
│ │ └─Selection_32 10.00 cop[tikv] eq(test.tab_a.num, 112)
│ │ └─TableFullScan_31 10000.00 cop[tikv] table:Tab_A keep order:false, stats:pseudo
│ └─TableReader_21(Probe) 1.00 root data:TableRangeScan_20
│ └─TableRangeScan_20 1.00 cop[tikv] table:Tab_B range: decided by [test.tab_a.bid], keep order:false, stats:pseudo
└─TableReader_10(Probe) 1.00 root data:TableRangeScan_9
└─TableRangeScan_9 1.00 cop[tikv] table:Tab_C range: decided by [test.tab_a.cid], keep order:false, stats:pseudo
select Tab_A.name AAA,Tab_B.name BBB,Tab_A.amt Aamt, Tab_C.amt Bamt,IFNULL(Tab_C.amt, 0) FROM Tab_A left join Tab_B on Tab_A.bid=Tab_B.id left join Tab_C on Tab_A.cid=Tab_C.id and Tab_A.type='01' where Tab_A.num=112;
AAA BBB Aamt Bamt IFNULL(Tab_C.amt, 0)
A01 B01 111.00 22.00 22.00
A02 B02 111.00 NULL 0
16 changes: 16 additions & 0 deletions cmd/explaintest/t/explain_complex.test
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,19 @@ CREATE TABLE org_position (
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

explain format = 'brief' SELECT d.id, d.ctx, d.name, d.left_value, d.right_value, d.depth, d.leader_id, d.status, d.created_on, d.updated_on FROM org_department AS d LEFT JOIN org_position AS p ON p.department_id = d.id AND p.status = 1000 LEFT JOIN org_employee_position AS ep ON ep.position_id = p.id AND ep.status = 1000 WHERE (d.ctx = 1 AND (ep.user_id = 62 OR d.id = 20 OR d.id = 20) AND d.status = 1000) GROUP BY d.id ORDER BY d.left_value;

create table test.Tab_A (id int primary key,bid int,cid int,name varchar(20),type varchar(20),num int,amt decimal(11,2));
create table test.Tab_B (id int primary key,name varchar(20));
create table test.Tab_C (id int primary key,name varchar(20),amt decimal(11,2));

insert into test.Tab_A values(2,2,2,'A01','01',112,111);
insert into test.Tab_A values(4,4,4,'A02','02',112,111);
insert into test.Tab_B values(2,'B01');
insert into test.Tab_B values(4,'B02');
insert into test.Tab_C values(2,'C01',22);
insert into test.Tab_C values(4,'C01',5);

explain select Tab_A.name AAA,Tab_B.name BBB,Tab_A.amt Aamt, Tab_C.amt Bamt,IFNULL(Tab_C.amt, 0) FROM Tab_A left join Tab_B on Tab_A.bid=Tab_B.id left join Tab_C on Tab_A.cid=Tab_C.id and Tab_A.type='01' where Tab_A.num=112;

select Tab_A.name AAA,Tab_B.name BBB,Tab_A.amt Aamt, Tab_C.amt Bamt,IFNULL(Tab_C.amt, 0) FROM Tab_A left join Tab_B on Tab_A.bid=Tab_B.id left join Tab_C on Tab_A.cid=Tab_C.id and Tab_A.type='01' where Tab_A.num=112;

78 changes: 61 additions & 17 deletions planner/core/rule_join_reorder.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
// For example: "InnerJoin(InnerJoin(a, b), LeftJoin(c, d))"
// results in a join group {a, b, c, d}.
func extractJoinGroup(p LogicalPlan) (group []LogicalPlan, eqEdges []*expression.ScalarFunction,
otherConds []expression.Expression, joinTypes []JoinType, hintInfo []*tableHintInfo, hasOuterJoin bool) {
otherConds []expression.Expression, joinTypes []*joinTypeWithExtMsg, hintInfo []*tableHintInfo, hasOuterJoin bool) {
join, isJoin := p.(*LogicalJoin)
if isJoin && join.preferJoinOrder {
// When there is a leading hint, the hint may not take effect for other reasons.
Expand Down Expand Up @@ -129,12 +129,26 @@ func extractJoinGroup(p LogicalPlan) (group []LogicalPlan, eqEdges []*expression
}

eqEdges = append(eqEdges, join.EqualConditions...)
otherConds = append(otherConds, join.OtherConditions...)
otherConds = append(otherConds, join.LeftConditions...)
otherConds = append(otherConds, join.RightConditions...)
for range join.EqualConditions {
joinTypes = append(joinTypes, join.JoinType)
tmpOtherConds := make(expression.CNFExprs, 0, len(join.OtherConditions)+len(join.LeftConditions)+len(join.RightConditions))
tmpOtherConds = append(tmpOtherConds, join.OtherConditions...)
tmpOtherConds = append(tmpOtherConds, join.LeftConditions...)
tmpOtherConds = append(tmpOtherConds, join.RightConditions...)
if join.JoinType == LeftOuterJoin || join.JoinType == RightOuterJoin {
for range join.EqualConditions {
abType := &joinTypeWithExtMsg{JoinType: join.JoinType}
// outer join's other condition should be bound with the connecting edge.
// although we bind the outer condition to **anyone** of the join type, it will be extracted **only once** when make a new join.
abType.outerBindCondition = tmpOtherConds
joinTypes = append(joinTypes, abType)
}
} else {
for range join.EqualConditions {
abType := &joinTypeWithExtMsg{JoinType: join.JoinType}
joinTypes = append(joinTypes, abType)
}
otherConds = append(otherConds, tmpOtherConds...)
}

return group, eqEdges, otherConds, joinTypes, hintInfo, hasOuterJoin
}

Expand All @@ -146,6 +160,11 @@ type jrNode struct {
cumCost float64
}

type joinTypeWithExtMsg struct {
JoinType
outerBindCondition []expression.Expression
}

func (s *joinReOrderSolver) optimize(_ context.Context, p LogicalPlan, opt *logicalOptimizeOp) (LogicalPlan, error) {
tracer := &joinReorderTrace{cost: map[string]float64{}, opt: opt}
tracer.traceJoinReorder(p)
Expand All @@ -172,7 +191,7 @@ func (s *joinReOrderSolver) optimizeRecursive(ctx sessionctx.Context, p LogicalP
// Not support outer join reorder when using the DP algorithm
isSupportDP := true
for _, joinType := range joinTypes {
if joinType != InnerJoin {
if joinType.JoinType != InnerJoin {
isSupportDP = false
break
}
Expand Down Expand Up @@ -290,7 +309,7 @@ type baseSingleGroupJoinOrderSolver struct {
curJoinGroup []*jrNode
otherConds []expression.Expression
eqEdges []*expression.ScalarFunction
joinTypes []JoinType
joinTypes []*joinTypeWithExtMsg
leadingJoinGroup LogicalPlan
}

Expand Down Expand Up @@ -318,7 +337,7 @@ func (s *baseSingleGroupJoinOrderSolver) generateLeadingJoinGroup(curJoinGroup [
leadingJoinGroup = leadingJoinGroup[1:]
for len(leadingJoinGroup) > 0 {
var usedEdges []*expression.ScalarFunction
var joinType JoinType
var joinType *joinTypeWithExtMsg
leadingJoin, leadingJoinGroup[0], usedEdges, joinType = s.checkConnection(leadingJoin, leadingJoinGroup[0])
if hasOuterJoin && usedEdges == nil {
// If the joinGroups contain the outer join, we disable the cartesian product.
Expand Down Expand Up @@ -359,8 +378,8 @@ func (s *baseSingleGroupJoinOrderSolver) baseNodeCumCost(groupNode LogicalPlan)
}

// checkConnection used to check whether two nodes have equal conditions or not.
func (s *baseSingleGroupJoinOrderSolver) checkConnection(leftPlan, rightPlan LogicalPlan) (leftNode, rightNode LogicalPlan, usedEdges []*expression.ScalarFunction, joinType JoinType) {
joinType = InnerJoin
func (s *baseSingleGroupJoinOrderSolver) checkConnection(leftPlan, rightPlan LogicalPlan) (leftNode, rightNode LogicalPlan, usedEdges []*expression.ScalarFunction, joinType *joinTypeWithExtMsg) {
joinType = &joinTypeWithExtMsg{JoinType: InnerJoin}
leftNode, rightNode = leftPlan, rightPlan
for idx, edge := range s.eqEdges {
lCol := edge.GetArgs()[0].(*expression.Column)
Expand All @@ -370,7 +389,7 @@ func (s *baseSingleGroupJoinOrderSolver) checkConnection(leftPlan, rightPlan Log
usedEdges = append(usedEdges, edge)
} else if rightPlan.Schema().Contains(lCol) && leftPlan.Schema().Contains(rCol) {
joinType = s.joinTypes[idx]
if joinType != InnerJoin {
if joinType.JoinType != InnerJoin {
rightNode, leftNode = leftPlan, rightPlan
usedEdges = append(usedEdges, edge)
} else {
Expand All @@ -383,12 +402,19 @@ func (s *baseSingleGroupJoinOrderSolver) checkConnection(leftPlan, rightPlan Log
}

// makeJoin build join tree for the nodes which have equal conditions to connect them.
func (s *baseSingleGroupJoinOrderSolver) makeJoin(leftPlan, rightPlan LogicalPlan, eqEdges []*expression.ScalarFunction, joinType JoinType) (LogicalPlan, []expression.Expression) {
func (s *baseSingleGroupJoinOrderSolver) makeJoin(leftPlan, rightPlan LogicalPlan, eqEdges []*expression.ScalarFunction, joinType *joinTypeWithExtMsg) (LogicalPlan, []expression.Expression) {
remainOtherConds := make([]expression.Expression, len(s.otherConds))
copy(remainOtherConds, s.otherConds)
var otherConds []expression.Expression
var leftConds []expression.Expression
var rightConds []expression.Expression
var (
otherConds []expression.Expression
leftConds []expression.Expression
rightConds []expression.Expression

// for outer bind conditions
obOtherConds []expression.Expression
obLeftConds []expression.Expression
obRightConds []expression.Expression
)
mergedSchema := expression.MergeSchema(leftPlan.Schema(), rightPlan.Schema())

remainOtherConds, leftConds = expression.FilterOutInPlace(remainOtherConds, func(expr expression.Expression) bool {
Expand All @@ -400,7 +426,25 @@ func (s *baseSingleGroupJoinOrderSolver) makeJoin(leftPlan, rightPlan LogicalPla
remainOtherConds, otherConds = expression.FilterOutInPlace(remainOtherConds, func(expr expression.Expression) bool {
return expression.ExprFromSchema(expr, mergedSchema)
})
return s.newJoinWithEdges(leftPlan, rightPlan, eqEdges, otherConds, leftConds, rightConds, joinType), remainOtherConds
if len(joinType.outerBindCondition) > 0 {
remainOBOtherConds := make([]expression.Expression, len(joinType.outerBindCondition))
copy(remainOBOtherConds, joinType.outerBindCondition)
remainOBOtherConds, obLeftConds = expression.FilterOutInPlace(remainOBOtherConds, func(expr expression.Expression) bool {
return expression.ExprFromSchema(expr, leftPlan.Schema()) && !expression.ExprFromSchema(expr, rightPlan.Schema())
})
remainOBOtherConds, obRightConds = expression.FilterOutInPlace(remainOBOtherConds, func(expr expression.Expression) bool {
return expression.ExprFromSchema(expr, rightPlan.Schema()) && !expression.ExprFromSchema(expr, leftPlan.Schema())
})
// _ here make the linter happy.
_, obOtherConds = expression.FilterOutInPlace(remainOBOtherConds, func(expr expression.Expression) bool {
return expression.ExprFromSchema(expr, mergedSchema)
})
// case like: (A * B) left outer join C on (A.a = C.a && B.b > 0) will remain B.b > 0 in remainOBOtherConds (while this case
// has been forbidden by: filters of the outer join is related with multiple leaves of the outer join side in #34603)
// so noway here we got remainOBOtherConds remained.
}
return s.newJoinWithEdges(leftPlan, rightPlan, eqEdges,
append(otherConds, obOtherConds...), append(leftConds, obLeftConds...), append(rightConds, obRightConds...), joinType.JoinType), remainOtherConds
}

// makeBushyJoin build bushy tree for the nodes which have no equal condition to connect them.
Expand Down

0 comments on commit 0e4af6c

Please sign in to comment.