Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: do not wait for data unchanged when auto analyze (#7022) #7093

Merged
merged 3 commits into from
Jul 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 26 additions & 17 deletions statistics/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,28 +368,37 @@ const (
// AutoAnalyzeMinCnt means if the count of table is less than this value, we needn't do auto analyze.
var AutoAnalyzeMinCnt int64 = 1000

func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) bool {
if tbl.ModifyCount == 0 || tbl.Count < AutoAnalyzeMinCnt {
return false
}
t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond))
if time.Since(t) < limit {
return false
}
if autoAnalyzeRatio > 0 && float64(tbl.ModifyCount)/float64(tbl.Count) > autoAnalyzeRatio {
return true
}
// tableAnalyzed checks if the table is analyzed.
func tableAnalyzed(tbl *Table) bool {
for _, col := range tbl.Columns {
if col.Count > 0 {
return false
if col.Histogram.Len() > 0 {
return true
}
}
for _, idx := range tbl.Indices {
if idx.Len() > 0 {
return false
if idx.Histogram.Len() > 0 {
return true
}
}
return true
return false
}

// needAnalyzeTable checks if we need to analyze the table:
// 1. If the table has never been analyzed, we need to analyze it when it has
// not been modified for a time.
// 2. If the table had been analyzed before, we need to analyze it when
// "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio".
func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) bool {
analyzed := tableAnalyzed(tbl)
if !analyzed {
t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond))
return time.Since(t) >= limit
}
// Auto analyze is disabled.
if autoAnalyzeRatio == 0 {
return false
}
return float64(tbl.ModifyCount)/float64(tbl.Count) > autoAnalyzeRatio
}

const minAutoAnalyzeRatio = 0.3
Expand Down Expand Up @@ -422,7 +431,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
for _, tbl := range tbls {
tblInfo := tbl.Meta()
statsTbl := h.GetTableStats(tblInfo)
if statsTbl.Pseudo || statsTbl.Count == 0 {
if statsTbl.Pseudo || statsTbl.Count < AutoAnalyzeMinCnt {
continue
}
tblName := "`" + db + "`.`" + tblInfo.Name.O + "`"
Expand Down
29 changes: 25 additions & 4 deletions statistics/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ import (
"strings"
"time"

"github.com/juju/errors"
. "github.com/pingcap/check"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
Expand Down Expand Up @@ -270,6 +272,20 @@ func (s *testStatsUpdateSuite) TestTxnWithFailure(c *C) {
c.Assert(stats1.Count, Equals, int64(rowCount1+1))
}

// dumpAnalyzeResult is used for dump the analyze result to KV. We need this because sometimes
// we need to temporary make the stats lease greater than 0, but the analyze executor will only send
// the result to a channel.
func dumpAnalyzeResult(sctx sessionctx.Context, h *statistics.Handle) error {
for len(h.AnalyzeResultCh()) > 0 {
t := <-h.AnalyzeResultCh()
for i, hg := range t.Hist {
err := statistics.SaveStatsToStorage(sctx, t.TableID, t.Count, t.IsIndex, hg, t.Cms[i])
return errors.Trace(err)
}
}
return nil
}

func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
Expand Down Expand Up @@ -311,12 +327,16 @@ func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
break
}

// Test that even if the table is recently modified, we can still analyze the table.
h.Lease = time.Millisecond
defer func() { h.Lease = 0 }()
_, err = testKit.Exec("insert into t values ('fff')")
c.Assert(err, IsNil)
c.Assert(h.DumpStatsDeltaToKV(), IsNil)
c.Assert(h.Update(is), IsNil)
err = h.HandleAutoAnalyze(is)
c.Assert(err, IsNil)
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
h.Update(is)
stats = h.GetTableStats(tableInfo)
c.Assert(stats.Count, Equals, int64(2))
Expand All @@ -328,6 +348,7 @@ func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
c.Assert(h.Update(is), IsNil)
err = h.HandleAutoAnalyze(is)
c.Assert(err, IsNil)
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
h.Update(is)
stats = h.GetTableStats(tableInfo)
c.Assert(stats.Count, Equals, int64(3))
Expand All @@ -336,13 +357,10 @@ func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
_, err = testKit.Exec("insert into t values ('eee')")
c.Assert(err, IsNil)
h.DumpStatsDeltaToKV()
h.Clear()
// We set `Lease` here so that `Update` will use load by need strategy.
h.Lease = time.Second
h.Update(is)
h.Lease = 0
err = h.HandleAutoAnalyze(is)
c.Assert(err, IsNil)
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
h.Update(is)
stats = h.GetTableStats(tableInfo)
c.Assert(stats.Count, Equals, int64(4))
Expand All @@ -354,13 +372,16 @@ func (s *testStatsUpdateSuite) TestAutoUpdate(c *C) {
break
}

testKit.MustExec("analyze table t")
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
_, err = testKit.Exec("create index idx on t(a)")
c.Assert(err, IsNil)
is = do.InfoSchema()
tbl, err = is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tableInfo = tbl.Meta()
h.HandleAutoAnalyze(is)
c.Assert(dumpAnalyzeResult(testKit.Se, h), IsNil)
h.Update(is)
stats = h.GetTableStats(tableInfo)
c.Assert(stats.Count, Equals, int64(4))
Expand Down