Skip to content

Commit

Permalink
Merge branch 'release-3.0' into automated-cherry-pick-of-pingcap#10837-…
Browse files Browse the repository at this point in the history
…upstream-release-3.0
  • Loading branch information
eurekaka authored Oct 18, 2019
2 parents 91201ed + c6bbf0f commit d94af9a
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 26 deletions.
2 changes: 1 addition & 1 deletion cmd/explaintest/r/tpch.result
Original file line number Diff line number Diff line change
Expand Up @@ -1223,7 +1223,7 @@ id count task operator info
Projection_25 1.00 root tpch.supplier.s_name, 17_col_0
└─TopN_28 1.00 root 17_col_0:desc, tpch.supplier.s_name:asc, offset:0, count:100
└─HashAgg_34 1.00 root group by:tpch.supplier.s_name, funcs:count(1), firstrow(tpch.supplier.s_name)
└─IndexJoin_40 7828961.66 root anti semi join, inner:IndexLookUp_39, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l3.l_suppkey, tpch.supplier.s_suppkey)
└─IndexJoin_40 7828961.66 root anti semi join, inner:IndexLookUp_39, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey)
├─IndexJoin_56 9786202.08 root semi join, inner:IndexLookUp_55, outer key:tpch.l1.l_orderkey, inner key:tpch.l2.l_orderkey, other cond:ne(tpch.l2.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l2.l_suppkey, tpch.supplier.s_suppkey)
│ ├─IndexJoin_62 12232752.60 root inner join, inner:TableReader_61, outer key:tpch.l1.l_orderkey, inner key:tpch.orders.o_orderkey
│ │ ├─HashRightJoin_66 12232752.60 root inner join, inner:HashRightJoin_72, equal:[eq(tpch.supplier.s_suppkey, tpch.l1.l_suppkey)]
Expand Down
45 changes: 45 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,48 @@ func (s *testIntegrationSuite) TestSimplifyOuterJoinWithCast(c *C) {
tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...))
}
}

func (s *testIntegrationSuite) TestAntiJoinConstProp(c *C) {
store, dom, err := newStoreWithBootstrap()
c.Assert(err, IsNil)
tk := testkit.NewTestKit(c, store)
defer func() {
dom.Close()
store.Close()
}()
tk.MustExec("use test")
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1(a int not null, b int not null)")
tk.MustExec("insert into t1 values (1,1)")
tk.MustExec("create table t2(a int not null, b int not null)")
tk.MustExec("insert into t2 values (2,2)")

tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.a = t1.a and t2.a > 1)").Check(testkit.Rows(
"1 1",
))
tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.b = t1.b and t2.a > 1)").Check(testkit.Rows(
"1 1",
))
tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.b = t1.b and t2.b > 1)").Check(testkit.Rows(
"1 1",
))
tk.MustQuery("select q.a in (select count(*) from t1 s where not exists (select 1 from t1 p where q.a > 1 and p.a = s.a)) from t1 q").Check(testkit.Rows(
"1",
))
tk.MustQuery("select q.a in (select not exists (select 1 from t1 p where q.a > 1 and p.a = s.a) from t1 s) from t1 q").Check(testkit.Rows(
"1",
))

tk.MustExec("drop table t1, t2")
tk.MustExec("create table t1(a int not null, b int)")
tk.MustExec("insert into t1 values (1,null)")
tk.MustExec("create table t2(a int not null, b int)")
tk.MustExec("insert into t2 values (2,2)")

tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.b > t1.b)").Check(testkit.Rows(
"1 <nil>",
))
tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t1.a = 2)").Check(testkit.Rows(
"1 <nil>",
))
}
16 changes: 8 additions & 8 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,14 @@ func (p *LogicalJoin) pushDownConstExpr(expr expression.Expression, leftCond []e
} else {
leftCond = append(leftCond, expr)
}
case SemiJoin, AntiSemiJoin, InnerJoin:
case SemiJoin, InnerJoin:
leftCond = append(leftCond, expr)
rightCond = append(rightCond, expr)
case AntiSemiJoin:
if filterCond {
leftCond = append(leftCond, expr)
}
rightCond = append(rightCond, expr)
}
return leftCond, rightCond
}
Expand All @@ -240,18 +245,13 @@ func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, der
arg0, arg1 = arg1, arg0
}
if leftCol != nil && rightCol != nil {
// Do not derive `is not null` for anti join, since it may cause wrong results.
// For example:
// `select * from t t1 where t1.a not in (select b from t t2)` does not imply `t2.b is not null`,
// `select * from t t1 where t1.a not in (select a from t t2 where t1.b = t2.b` does not imply `t1.b is not null`,
// `select * from t t1 where not exists (select * from t t2 where t2.a = t1.a)` does not imply `t1.a is not null`,
if deriveLeft && p.JoinType != AntiSemiJoin {
if deriveLeft {
if isNullRejected(ctx, left.Schema(), expr) && !mysql.HasNotNullFlag(leftCol.RetType.Flag) {
notNullExpr := expression.BuildNotNullExpr(ctx, leftCol)
leftCond = append(leftCond, notNullExpr)
}
}
if deriveRight && p.JoinType != AntiSemiJoin {
if deriveRight {
if isNullRejected(ctx, right.Schema(), expr) && !mysql.HasNotNullFlag(rightCol.RetType.Flag) {
notNullExpr := expression.BuildNotNullExpr(ctx, rightCol)
rightCond = append(rightCond, notNullExpr)
Expand Down
2 changes: 1 addition & 1 deletion planner/core/logical_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ func (s *testPlanSuite) TestAntiSemiJoinConstFalse(c *C) {
}{
{
sql: "select a from t t1 where not exists (select a from t t2 where t1.a = t2.a and t2.b = 1 and t2.b = 2)",
best: "Join{DataScan(t1)->DataScan(t2)}->Projection",
best: "Join{DataScan(t1)->DataScan(t2)}(test.t1.a,test.t2.a)->Projection",
joinType: "anti semi join",
},
}
Expand Down
31 changes: 23 additions & 8 deletions planner/core/rule_predicate_push_down.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
p.LeftConditions = nil
ret = append(expression.ScalarFuncs2Exprs(equalCond), otherCond...)
ret = append(ret, leftPushCond...)
case SemiJoin, AntiSemiJoin, InnerJoin:
case SemiJoin, InnerJoin:
tempCond := make([]expression.Expression, 0, len(p.LeftConditions)+len(p.RightConditions)+len(p.EqualConditions)+len(p.OtherConditions)+len(predicates))
tempCond = append(tempCond, p.LeftConditions...)
tempCond = append(tempCond, p.RightConditions...)
Expand All @@ -158,13 +158,10 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
tempCond = append(tempCond, predicates...)
tempCond = expression.ExtractFiltersFromDNFs(p.ctx, tempCond)
tempCond = expression.PropagateConstant(p.ctx, tempCond)
// Return table dual when filter is constant false or null. Not applicable to AntiSemiJoin.
// TODO: For AntiSemiJoin, we can use outer plan to substitute LogicalJoin actually.
if p.JoinType != AntiSemiJoin {
dual := conds2TableDual(p, tempCond)
if dual != nil {
return ret, dual
}
// Return table dual when filter is constant false or null.
dual := conds2TableDual(p, tempCond)
if dual != nil {
return ret, dual
}
equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(tempCond, true, true)
p.LeftConditions = nil
Expand All @@ -173,6 +170,24 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret
p.OtherConditions = otherCond
leftCond = leftPushCond
rightCond = rightPushCond
case AntiSemiJoin:
predicates = expression.PropagateConstant(p.ctx, predicates)
// Return table dual when filter is constant false or null.
dual := conds2TableDual(p, predicates)
if dual != nil {
return ret, dual
}
// `predicates` should only contain left conditions or constant filters.
_, leftPushCond, rightPushCond, _ = p.extractOnCondition(predicates, true, true)
// Do not derive `is not null` for anti join, since it may cause wrong results.
// For example:
// `select * from t t1 where t1.a not in (select b from t t2)` does not imply `t2.b is not null`,
// `select * from t t1 where t1.a not in (select a from t t2 where t1.b = t2.b` does not imply `t1.b is not null`,
// `select * from t t1 where not exists (select * from t t2 where t2.a = t1.a)` does not imply `t1.a is not null`,
leftCond = leftPushCond
rightCond = append(p.RightConditions, rightPushCond...)
p.RightConditions = nil

}
leftCond = expression.RemoveDupExprs(p.ctx, leftCond)
rightCond = expression.RemoveDupExprs(p.ctx, rightCond)
Expand Down
4 changes: 3 additions & 1 deletion statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,9 @@ func LoadCMSketchWithTopN(exec sqlexec.RestrictedSQLExecutor, tableID, isIndex,
}
topN := make([]*TopNMeta, 0, len(topNRows))
for _, row := range topNRows {
topN = append(topN, &TopNMeta{Data: row.GetBytes(0), Count: row.GetUint64(1)})
data := make([]byte, len(row.GetBytes(0)))
copy(data, row.GetBytes(0))
topN = append(topN, &TopNMeta{Data: data, Count: row.GetUint64(1)})
}
return decodeCMSketch(cms, topN)
}
Expand Down
5 changes: 3 additions & 2 deletions statistics/handle/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables Stat
continue
}
id, ndv, nullCount, version, totColSize := row.GetInt64(2), row.GetInt64(3), row.GetInt64(5), row.GetUint64(4), row.GetInt64(7)
lastAnalyzePos := row.GetDatum(11, types.NewFieldType(mysql.TypeBlob))
tbl, _ := h.getTableByPhysicalID(is, table.PhysicalID)
if row.GetInt64(1) > 0 {
var idxInfo *model.IndexInfo
Expand All @@ -109,7 +110,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables Stat
terror.Log(errors.Trace(err))
}
hist := statistics.NewHistogram(id, ndv, nullCount, version, types.NewFieldType(mysql.TypeBlob), chunk.InitialCapacity, 0)
table.Indices[hist.ID] = &statistics.Index{Histogram: *hist, CMSketch: cms, Info: idxInfo, StatsVer: row.GetInt64(8), Flag: row.GetInt64(10), LastAnalyzePos: row.GetDatum(11, types.NewFieldType(mysql.TypeBlob))}
table.Indices[hist.ID] = &statistics.Index{Histogram: *hist, CMSketch: cms, Info: idxInfo, StatsVer: row.GetInt64(8), Flag: row.GetInt64(10), LastAnalyzePos: *lastAnalyzePos.Copy()}
} else {
var colInfo *model.ColumnInfo
for _, col := range tbl.Meta().Columns {
Expand All @@ -130,7 +131,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables Stat
Count: nullCount,
IsHandle: tbl.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag),
Flag: row.GetInt64(10),
LastAnalyzePos: row.GetDatum(11, types.NewFieldType(mysql.TypeBlob)),
LastAnalyzePos: *lastAnalyzePos.Copy(),
}
}
}
Expand Down
11 changes: 7 additions & 4 deletions statistics/handle/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ func (h *Handle) Clear() {
}
h.feedback = h.feedback[:0]
h.mu.ctx.GetSessionVars().InitChunkSize = 1
h.mu.ctx.GetSessionVars().MaxChunkSize = 32
h.mu.ctx.GetSessionVars().MaxChunkSize = 1
h.mu.ctx.GetSessionVars().ProjectionConcurrency = 0
h.listHead = &SessionStatsCollector{mapper: make(tableDeltaMap), rateMap: make(errorRateDeltaMap)}
h.globalMap = make(tableDeltaMap)
h.mu.rateMap = make(errorRateDeltaMap)
Expand Down Expand Up @@ -353,6 +354,7 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *statistics.Table, t
idx := table.Indices[histID]
errorRate := statistics.ErrorRate{}
flag := row.GetInt64(8)
lastAnalyzePos := row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))
if statistics.IsAnalyzed(flag) {
h.mu.Lock()
h.mu.rateMap.clear(table.PhysicalID, histID, true)
Expand All @@ -373,7 +375,7 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *statistics.Table, t
if err != nil {
return errors.Trace(err)
}
idx = &statistics.Index{Histogram: *hg, CMSketch: cms, Info: idxInfo, ErrorRate: errorRate, StatsVer: row.GetInt64(7), Flag: flag, LastAnalyzePos: row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))}
idx = &statistics.Index{Histogram: *hg, CMSketch: cms, Info: idxInfo, ErrorRate: errorRate, StatsVer: row.GetInt64(7), Flag: flag, LastAnalyzePos: *lastAnalyzePos.Copy()}
}
break
}
Expand All @@ -392,6 +394,7 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *statistics.Table,
nullCount := row.GetInt64(5)
totColSize := row.GetInt64(6)
correlation := row.GetFloat64(9)
lastAnalyzePos := row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))
col := table.Columns[histID]
errorRate := statistics.ErrorRate{}
flag := row.GetInt64(8)
Expand Down Expand Up @@ -429,7 +432,7 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *statistics.Table,
ErrorRate: errorRate,
IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag),
Flag: flag,
LastAnalyzePos: row.GetDatum(10, types.NewFieldType(mysql.TypeBlob)),
LastAnalyzePos: *lastAnalyzePos.Copy(),
}
col.Histogram.Correlation = correlation
break
Expand All @@ -452,7 +455,7 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *statistics.Table,
ErrorRate: errorRate,
IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag),
Flag: flag,
LastAnalyzePos: row.GetDatum(10, types.NewFieldType(mysql.TypeBlob)),
LastAnalyzePos: *lastAnalyzePos.Copy(),
}
break
}
Expand Down
6 changes: 5 additions & 1 deletion statistics/handle/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ func (s *testStatsSuite) TestInitStats(c *C) {
testKit := testkit.NewTestKit(c, s.store)
testKit.MustExec("use test")
testKit.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
testKit.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6)")
testKit.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,7,8)")
testKit.MustExec("analyze table t")
h := s.do.StatsHandle()
is := s.do.InfoSchema()
Expand All @@ -384,6 +384,10 @@ func (s *testStatsSuite) TestInitStats(c *C) {
h.Clear()
c.Assert(h.InitStats(is), IsNil)
table0 := h.GetTableStats(tbl.Meta())
cols := table0.Columns
c.Assert(cols[1].LastAnalyzePos.GetBytes()[0], Equals, uint8(0x36))
c.Assert(cols[2].LastAnalyzePos.GetBytes()[0], Equals, uint8(0x37))
c.Assert(cols[3].LastAnalyzePos.GetBytes()[0], Equals, uint8(0x38))
h.Clear()
c.Assert(h.Update(is), IsNil)
table1 := h.GetTableStats(tbl.Meta())
Expand Down

0 comments on commit d94af9a

Please sign in to comment.