From 14cb4a7050bfe4e52a8d4dc6e05d3e9f1a8a986b Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Thu, 29 Apr 2021 18:20:17 +0800 Subject: [PATCH 1/3] statistics: trigger auto-analyze based on histogram row count --- statistics/handle/update.go | 8 ++++-- statistics/handle/update_test.go | 44 ++++++++++++++++++++++++++++++++ statistics/table.go | 10 ++++++++ 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/statistics/handle/update.go b/statistics/handle/update.go index c65f0885877f6..68fbe3be761cc 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -875,10 +875,14 @@ func NeedAnalyzeTable(tbl *statistics.Table, limit time.Duration, autoAnalyzeRat return false, "" } // No need to analyze it. - if float64(tbl.ModifyCount)/float64(tbl.Count) <= autoAnalyzeRatio { + tblCnt := float64(tbl.Count) + if histCnt := tbl.ColHistCount(); histCnt > 0 { + tblCnt = histCnt + } + if float64(tbl.ModifyCount)/tblCnt <= autoAnalyzeRatio { return false, "" } - return true, fmt.Sprintf("too many modifications(%v/%v>%v)", tbl.ModifyCount, tbl.Count, autoAnalyzeRatio) + return true, fmt.Sprintf("too many modifications(%v/%v>%v)", tbl.ModifyCount, tblCnt, autoAnalyzeRatio) } func (h *Handle) getAutoAnalyzeParameters() map[string]string { diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go index b105738098f4b..754755dec8c83 100644 --- a/statistics/handle/update_test.go +++ b/statistics/handle/update_test.go @@ -2257,3 +2257,47 @@ func (s *testSerialStatsSuite) TestAutoUpdatePartitionInDynamicOnlyMode(c *C) { c.Assert(partitionStats.ModifyCount, Equals, int64(0)) }) } + +func (s *testSerialStatsSuite) TestAutoAnalyzeRatio(c *C) { + defer cleanEnv(c, s.store, s.do) + tk := testkit.NewTestKit(c, s.store) + + oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string) + oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string) + handle.AutoAnalyzeMinCnt = 0 + defer func() { + handle.AutoAnalyzeMinCnt = 1000 + tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart)) + tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd)) + }() + + h := s.do.StatsHandle() + tk.MustExec("use test") + tk.MustExec("create table t (a int)") + c.Assert(h.HandleDDLEvent(<-h.DDLEventCh()), IsNil) + tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", 19)) + c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) + is := s.do.InfoSchema() + c.Assert(h.Update(is), IsNil) + // To pass the stats.Pseudo check in autoAnalyzeTable + tk.MustExec("analyze table t") + tk.MustExec("explain select * from t where a = 1") + c.Assert(h.LoadNeededHistograms(), IsNil) + tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='00:00 +0000'")) + tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='23:59 +0000'")) + + tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", 10)) + c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) + c.Assert(h.Update(is), IsNil) + c.Assert(h.HandleAutoAnalyze(is), IsTrue) + + tk.MustExec("delete from t limit 12") + c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) + c.Assert(h.Update(is), IsNil) + c.Assert(h.HandleAutoAnalyze(is), IsFalse) + + tk.MustExec("delete from t limit 4") + c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) + c.Assert(h.Update(s.do.InfoSchema()), IsNil) + c.Assert(s.do.StatsHandle().HandleAutoAnalyze(s.do.InfoSchema()), IsTrue) +} diff --git a/statistics/table.go b/statistics/table.go index 7628e018e25a5..398e0f5b2e3f6 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -214,6 +214,16 @@ func (t *Table) GetStatsInfo(ID int64, isIndex bool) (int64, *Histogram, *CMSket return int64(colStatsInfo.TotalRowCount()), colStatsInfo.Histogram.Copy(), colStatsInfo.CMSketch.Copy(), colStatsInfo.TopN.Copy(), colStatsInfo.FMSketch.Copy() } +// ColHistCount returns the count of the column histograms. +func (t *Table) ColHistCount() float64 { + for _, col := range t.Columns { + if col != nil { + return col.TotalRowCount() + } + } + return -1 +} + type tableColumnID struct { TableID int64 ColumnID int64 From 631ac2e8a1c87d82df3f7057f4e5ff9977046afb Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 28 Jul 2021 10:57:58 +0800 Subject: [PATCH 2/3] Update statistics/handle/update_test.go --- statistics/handle/update_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go index e25a04312c39c..eae598a2115b8 100644 --- a/statistics/handle/update_test.go +++ b/statistics/handle/update_test.go @@ -2324,7 +2324,7 @@ func (s *testSerialStatsSuite) TestAutoAnalyzeRatio(c *C) { c.Assert(h.HandleAutoAnalyze(is), IsFalse) tk.MustExec("delete from t limit 4") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - c.Assert(h.Update(s.do.InfoSchema()), IsNil) - c.Assert(s.do.StatsHandle().HandleAutoAnalyze(s.do.InfoSchema()), IsTrue) + c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) + c.Assert(h.Update(is), IsNil) + c.Assert(h.HandleAutoAnalyze(s.do.InfoSchema()), IsTrue) } From 666413591e796b414644153d1f569b65abc3d0cb Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 28 Jul 2021 11:09:26 +0800 Subject: [PATCH 3/3] Update statistics/handle/update_test.go --- statistics/handle/update_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go index 4e2a65589654f..9b0438ab2f655 100644 --- a/statistics/handle/update_test.go +++ b/statistics/handle/update_test.go @@ -2310,8 +2310,8 @@ func (s *testSerialStatsSuite) TestAutoAnalyzeRatio(c *C) { tk.MustExec("analyze table t") tk.MustExec("explain select * from t where a = 1") c.Assert(h.LoadNeededHistograms(), IsNil) - tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='00:00 +0000'")) - tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='23:59 +0000'")) + tk.MustExec("set global tidb_auto_analyze_start_time='00:00 +0000'") + tk.MustExec("set global tidb_auto_analyze_end_time='23:59 +0000'") tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", 10)) c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)