Skip to content

Commit

Permalink
statistics: trigger auto-analyze based on histogram row count
Browse files Browse the repository at this point in the history
  • Loading branch information
eurekaka committed May 8, 2021
1 parent 04da3ce commit 14cb4a7
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 2 deletions.
8 changes: 6 additions & 2 deletions statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -875,10 +875,14 @@ func NeedAnalyzeTable(tbl *statistics.Table, limit time.Duration, autoAnalyzeRat
return false, ""
}
// No need to analyze it.
if float64(tbl.ModifyCount)/float64(tbl.Count) <= autoAnalyzeRatio {
tblCnt := float64(tbl.Count)
if histCnt := tbl.ColHistCount(); histCnt > 0 {
tblCnt = histCnt
}
if float64(tbl.ModifyCount)/tblCnt <= autoAnalyzeRatio {
return false, ""
}
return true, fmt.Sprintf("too many modifications(%v/%v>%v)", tbl.ModifyCount, tbl.Count, autoAnalyzeRatio)
return true, fmt.Sprintf("too many modifications(%v/%v>%v)", tbl.ModifyCount, tblCnt, autoAnalyzeRatio)
}

func (h *Handle) getAutoAnalyzeParameters() map[string]string {
Expand Down
44 changes: 44 additions & 0 deletions statistics/handle/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2257,3 +2257,47 @@ func (s *testSerialStatsSuite) TestAutoUpdatePartitionInDynamicOnlyMode(c *C) {
c.Assert(partitionStats.ModifyCount, Equals, int64(0))
})
}

func (s *testSerialStatsSuite) TestAutoAnalyzeRatio(c *C) {
defer cleanEnv(c, s.store, s.do)
tk := testkit.NewTestKit(c, s.store)

oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string)
oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string)
handle.AutoAnalyzeMinCnt = 0
defer func() {
handle.AutoAnalyzeMinCnt = 1000
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart))
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd))
}()

h := s.do.StatsHandle()
tk.MustExec("use test")
tk.MustExec("create table t (a int)")
c.Assert(h.HandleDDLEvent(<-h.DDLEventCh()), IsNil)
tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", 19))
c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
is := s.do.InfoSchema()
c.Assert(h.Update(is), IsNil)
// To pass the stats.Pseudo check in autoAnalyzeTable
tk.MustExec("analyze table t")
tk.MustExec("explain select * from t where a = 1")
c.Assert(h.LoadNeededHistograms(), IsNil)
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='00:00 +0000'"))
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='23:59 +0000'"))

tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", 10))
c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
c.Assert(h.Update(is), IsNil)
c.Assert(h.HandleAutoAnalyze(is), IsTrue)

tk.MustExec("delete from t limit 12")
c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
c.Assert(h.Update(is), IsNil)
c.Assert(h.HandleAutoAnalyze(is), IsFalse)

tk.MustExec("delete from t limit 4")
c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil)
c.Assert(h.Update(s.do.InfoSchema()), IsNil)
c.Assert(s.do.StatsHandle().HandleAutoAnalyze(s.do.InfoSchema()), IsTrue)
}
10 changes: 10 additions & 0 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,16 @@ func (t *Table) GetStatsInfo(ID int64, isIndex bool) (int64, *Histogram, *CMSket
return int64(colStatsInfo.TotalRowCount()), colStatsInfo.Histogram.Copy(), colStatsInfo.CMSketch.Copy(), colStatsInfo.TopN.Copy(), colStatsInfo.FMSketch.Copy()
}

// ColHistCount returns the count of the column histograms.
func (t *Table) ColHistCount() float64 {
for _, col := range t.Columns {
if col != nil {
return col.TotalRowCount()
}
}
return -1
}

type tableColumnID struct {
TableID int64
ColumnID int64
Expand Down

0 comments on commit 14cb4a7

Please sign in to comment.