Skip to content

Commit

Permalink
stats: do not wait for data unchanged when auto analyze (pingcap#7022)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx authored and Haibin Xie committed Jul 18, 2018
1 parent 8438bad commit 99e13fe
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 21 deletions.
43 changes: 26 additions & 17 deletions statistics/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,28 +368,37 @@ const (
// AutoAnalyzeMinCnt means if the count of table is less than this value, we needn't do auto analyze.
var AutoAnalyzeMinCnt int64 = 1000

func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) bool {
if tbl.ModifyCount == 0 || tbl.Count < AutoAnalyzeMinCnt {
return false
}
t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond))
if time.Since(t) < limit {
return false
}
if autoAnalyzeRatio > 0 && float64(tbl.ModifyCount)/float64(tbl.Count) > autoAnalyzeRatio {
return true
}
// tableAnalyzed checks if the table is analyzed.
func tableAnalyzed(tbl *Table) bool {
for _, col := range tbl.Columns {
if col.Count > 0 {
return false
if col.Histogram.Len() > 0 {
return true
}
}
for _, idx := range tbl.Indices {
if idx.Len() > 0 {
return false
if idx.Histogram.Len() > 0 {
return true
}
}
return true
return false
}

// needAnalyzeTable checks if we need to analyze the table:
// 1. If the table has never been analyzed, we need to analyze it when it has
// not been modified for a time.
// 2. If the table had been analyzed before, we need to analyze it when
// "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio".
func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) bool {
analyzed := tableAnalyzed(tbl)
if !analyzed {
t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond))
return time.Since(t) >= limit
}
// Auto analyze is disabled.
if autoAnalyzeRatio == 0 {
return false
}
return float64(tbl.ModifyCount)/float64(tbl.Count) > autoAnalyzeRatio
}

const minAutoAnalyzeRatio = 0.3
Expand Down Expand Up @@ -422,7 +431,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
for _, tbl := range tbls {
tblInfo := tbl.Meta()
statsTbl := h.GetTableStats(tblInfo)
if statsTbl.Pseudo || statsTbl.Count == 0 {
if statsTbl.Pseudo || statsTbl.Count < AutoAnalyzeMinCnt {
continue
}
tblName := "`" + db + "`.`" + tblInfo.Name.O + "`"
Expand Down
29 changes: 25 additions & 4 deletions statistics/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ import (
"strings"
"time"

"github.com/juju/errors"
. "github.com/pingcap/check"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
Expand Down Expand Up @@ -270,6 +272,20 @@ func (s *testStatsUpdateSuite) TestTxnWithFailure(c *C) {
c.Assert(stats1.Count, Equals, int64(rowCount1+1))
}

// dumpAnalyzeResult is used for dump the analyze result to KV. We need this because sometimes
// we need to temporary make the stats lease greater than 0, but the analyze executor will only send
// the result to a channel.
func dumpAnalyzeResult(sctx sessionctx.Context, h *statistics.Handle) error {
for len(h.AnalyzeResultCh()) > 0 {
t := <-h.AnalyzeResultCh()
for i, hg := range t.Hist {
err := statistics.SaveStatsToStorage(sctx, t.TableID, t.Count, t.IsIndex, hg, t.Cms[i])
return errors.Trace(err)
}
}
return nil
}

func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
Expand Down Expand Up @@ -311,12 +327,16 @@ func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
break
}

// Test that even if the table is recently modified, we can still analyze the table.
h.Lease = time.Millisecond
defer func() { h.Lease = 0 }()
_, err = testKit.Exec("insert into t values ('fff')")
c.Assert(err, IsNil)
c.Assert(h.DumpStatsDeltaToKV(), IsNil)
c.Assert(h.Update(is), IsNil)
err = h.HandleAutoAnalyze(is)
c.Assert(err, IsNil)
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
h.Update(is)
stats = h.GetTableStats(tableInfo)
c.Assert(stats.Count, Equals, int64(2))
Expand All @@ -328,6 +348,7 @@ func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
c.Assert(h.Update(is), IsNil)
err = h.HandleAutoAnalyze(is)
c.Assert(err, IsNil)
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
h.Update(is)
stats = h.GetTableStats(tableInfo)
c.Assert(stats.Count, Equals, int64(3))
Expand All @@ -336,13 +357,10 @@ func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
_, err = testKit.Exec("insert into t values ('eee')")
c.Assert(err, IsNil)
h.DumpStatsDeltaToKV()
h.Clear()
// We set `Lease` here so that `Update` will use load by need strategy.
h.Lease = time.Second
h.Update(is)
h.Lease = 0
err = h.HandleAutoAnalyze(is)
c.Assert(err, IsNil)
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
h.Update(is)
stats = h.GetTableStats(tableInfo)
c.Assert(stats.Count, Equals, int64(4))
Expand All @@ -354,13 +372,16 @@ func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
break
}

testKit.MustExec("analyze table t")
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
_, err = testKit.Exec("create index idx on t(a)")
c.Assert(err, IsNil)
is = do.InfoSchema()
tbl, err = is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tableInfo = tbl.Meta()
h.HandleAutoAnalyze(is)
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
h.Update(is)
stats = h.GetTableStats(tableInfo)
c.Assert(stats.Count, Equals, int64(4))
Expand Down

0 comments on commit 99e13fe

Please sign in to comment.