Skip to content

Commit

Permalink
planner: fix constant propagation for AntiSemiJoin (#12728)
Browse files Browse the repository at this point in the history
Conflicts:
	cmd/explaintest/r/tpch.result
  • Loading branch information
eurekaka committed Oct 18, 2019
1 parent 7749430 commit 44083a7
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 18 deletions.
2 changes: 1 addition & 1 deletion cmd/explaintest/r/tpch.result
Original file line number Diff line number Diff line change
Expand Up @@ -1223,7 +1223,7 @@ id count task operator info
Projection_25 1.00 root tpch.supplier.s_name, 17_col_0
└─TopN_28 1.00 root 17_col_0:desc, tpch.supplier.s_name:asc, offset:0, count:100
└─HashAgg_34 1.00 root group by:tpch.supplier.s_name, funcs:count(1), firstrow(tpch.supplier.s_name)
└─IndexJoin_40 7828961.66 root anti semi join, inner:IndexLookUp_39, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l3.l_suppkey, tpch.supplier.s_suppkey)
└─IndexJoin_40 7828961.66 root anti semi join, inner:IndexLookUp_39, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey)
├─IndexJoin_56 9786202.08 root semi join, inner:IndexLookUp_55, outer key:tpch.l1.l_orderkey, inner key:tpch.l2.l_orderkey, other cond:ne(tpch.l2.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l2.l_suppkey, tpch.supplier.s_suppkey)
│ ├─IndexJoin_62 12232752.60 root inner join, inner:TableReader_61, outer key:tpch.l1.l_orderkey, inner key:tpch.orders.o_orderkey
│ │ ├─HashRightJoin_66 12232752.60 root inner join, inner:HashRightJoin_72, equal:[eq(tpch.supplier.s_suppkey, tpch.l1.l_suppkey)]
Expand Down
45 changes: 45 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,48 @@ func (s *testIntegrationSuite) TestSimplifyOuterJoinWithCast(c *C) {
tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...))
}
}

func (s *testIntegrationSuite) TestAntiJoinConstProp(c *C) {
store, dom, err := newStoreWithBootstrap()
c.Assert(err, IsNil)
tk := testkit.NewTestKit(c, store)
defer func() {
dom.Close()
store.Close()
}()
tk.MustExec("use test")
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1(a int not null, b int not null)")
tk.MustExec("insert into t1 values (1,1)")
tk.MustExec("create table t2(a int not null, b int not null)")
tk.MustExec("insert into t2 values (2,2)")

tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.a = t1.a and t2.a > 1)").Check(testkit.Rows(
"1 1",
))
tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.b = t1.b and t2.a > 1)").Check(testkit.Rows(
"1 1",
))
tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.b = t1.b and t2.b > 1)").Check(testkit.Rows(
"1 1",
))
tk.MustQuery("select q.a in (select count(*) from t1 s where not exists (select 1 from t1 p where q.a > 1 and p.a = s.a)) from t1 q").Check(testkit.Rows(
"1",
))
tk.MustQuery("select q.a in (select not exists (select 1 from t1 p where q.a > 1 and p.a = s.a) from t1 s) from t1 q").Check(testkit.Rows(
"1",
))

tk.MustExec("drop table t1, t2")
tk.MustExec("create table t1(a int not null, b int)")
tk.MustExec("insert into t1 values (1,null)")
tk.MustExec("create table t2(a int not null, b int)")
tk.MustExec("insert into t2 values (2,2)")

tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.b > t1.b)").Check(testkit.Rows(
"1 <nil>",
))
tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t1.a = 2)").Check(testkit.Rows(
"1 <nil>",
))
}
16 changes: 8 additions & 8 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,14 @@ func (p *LogicalJoin) pushDownConstExpr(expr expression.Expression, leftCond []e
} else {
leftCond = append(leftCond, expr)
}
case SemiJoin, AntiSemiJoin, InnerJoin:
case SemiJoin, InnerJoin:
leftCond = append(leftCond, expr)
rightCond = append(rightCond, expr)
case AntiSemiJoin:
if filterCond {
leftCond = append(leftCond, expr)
}
rightCond = append(rightCond, expr)
}
return leftCond, rightCond
}
Expand All @@ -239,18 +244,13 @@ func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, der
arg0, arg1 = arg1, arg0
}
if leftCol != nil && rightCol != nil {
// Do not derive `is not null` for anti join, since it may cause wrong results.
// For example:
// `select * from t t1 where t1.a not in (select b from t t2)` does not imply `t2.b is not null`,
// `select * from t t1 where t1.a not in (select a from t t2 where t1.b = t2.b` does not imply `t1.b is not null`,
// `select * from t t1 where not exists (select * from t t2 where t2.a = t1.a)` does not imply `t1.a is not null`,
if deriveLeft && p.JoinType != AntiSemiJoin {
if deriveLeft {
if isNullRejected(ctx, left.Schema(), expr) && !mysql.HasNotNullFlag(leftCol.RetType.Flag) {
notNullExpr := expression.BuildNotNullExpr(ctx, leftCol)
leftCond = append(leftCond, notNullExpr)
}
}
if deriveRight && p.JoinType != AntiSemiJoin {
if deriveRight {
if isNullRejected(ctx, right.Schema(), expr) && !mysql.HasNotNullFlag(rightCol.RetType.Flag) {
notNullExpr := expression.BuildNotNullExpr(ctx, rightCol)
rightCond = append(rightCond, notNullExpr)
Expand Down
2 changes: 1 addition & 1 deletion planner/core/logical_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ func (s *testPlanSuite) TestAntiSemiJoinConstFalse(c *C) {
}{
{
sql: "select a from t t1 where not exists (select a from t t2 where t1.a = t2.a and t2.b = 1 and t2.b = 2)",
best: "Join{DataScan(t1)->DataScan(t2)}->Projection",
best: "Join{DataScan(t1)->DataScan(t2)}(test.t1.a,test.t2.a)->Projection",
joinType: "anti semi join",
},
}
Expand Down
31 changes: 23 additions & 8 deletions planner/core/rule_predicate_push_down.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
p.LeftConditions = nil
ret = append(expression.ScalarFuncs2Exprs(equalCond), otherCond...)
ret = append(ret, leftPushCond...)
case SemiJoin, AntiSemiJoin, InnerJoin:
case SemiJoin, InnerJoin:
tempCond := make([]expression.Expression, 0, len(p.LeftConditions)+len(p.RightConditions)+len(p.EqualConditions)+len(p.OtherConditions)+len(predicates))
tempCond = append(tempCond, p.LeftConditions...)
tempCond = append(tempCond, p.RightConditions...)
Expand All @@ -158,13 +158,10 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
tempCond = append(tempCond, predicates...)
tempCond = expression.ExtractFiltersFromDNFs(p.ctx, tempCond)
tempCond = expression.PropagateConstant(p.ctx, tempCond)
// Return table dual when filter is constant false or null. Not applicable to AntiSemiJoin.
// TODO: For AntiSemiJoin, we can use outer plan to substitute LogicalJoin actually.
if p.JoinType != AntiSemiJoin {
dual := conds2TableDual(p, tempCond)
if dual != nil {
return ret, dual
}
// Return table dual when filter is constant false or null.
dual := conds2TableDual(p, tempCond)
if dual != nil {
return ret, dual
}
equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(tempCond, true, true)
p.LeftConditions = nil
Expand All @@ -173,6 +170,24 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
p.OtherConditions = otherCond
leftCond = leftPushCond
rightCond = rightPushCond
case AntiSemiJoin:
predicates = expression.PropagateConstant(p.ctx, predicates)
// Return table dual when filter is constant false or null.
dual := conds2TableDual(p, predicates)
if dual != nil {
return ret, dual
}
// `predicates` should only contain left conditions or constant filters.
_, leftPushCond, rightPushCond, _ = p.extractOnCondition(predicates, true, true)
// Do not derive `is not null` for anti join, since it may cause wrong results.
// For example:
// `select * from t t1 where t1.a not in (select b from t t2)` does not imply `t2.b is not null`,
// `select * from t t1 where t1.a not in (select a from t t2 where t1.b = t2.b` does not imply `t1.b is not null`,
// `select * from t t1 where not exists (select * from t t2 where t2.a = t1.a)` does not imply `t1.a is not null`,
leftCond = leftPushCond
rightCond = append(p.RightConditions, rightPushCond...)
p.RightConditions = nil

}
leftCond = expression.RemoveDupExprs(p.ctx, leftCond)
rightCond = expression.RemoveDupExprs(p.ctx, rightCond)
Expand Down

0 comments on commit 44083a7

Please sign in to comment.