Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: auto analyze on certain period of a day #7570

Merged
merged 7 commits into from
Sep 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,8 @@ var defaultSysVars = []*SysVar{
{ScopeSession, TiDBOptAggPushDown, boolToIntStr(DefOptAggPushDown)},
{ScopeGlobal | ScopeSession, TiDBBuildStatsConcurrency, strconv.Itoa(DefBuildStatsConcurrency)},
{ScopeGlobal, TiDBAutoAnalyzeRatio, strconv.FormatFloat(DefAutoAnalyzeRatio, 'f', -1, 64)},
{ScopeGlobal, TiDBAutoAnalyzeStartTime, DefAutoAnalyzeStartTime},
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
{ScopeGlobal, TiDBAutoAnalyzeEndTime, DefAutoAnalyzeEndTime},
{ScopeSession, TiDBChecksumTableConcurrency, strconv.Itoa(DefChecksumTableConcurrency)},
{ScopeGlobal | ScopeSession, TiDBDistSQLScanConcurrency, strconv.Itoa(DefDistSQLScanConcurrency)},
{ScopeGlobal | ScopeSession, TiDBOptInSubqUnFolding, boolToIntStr(DefOptInSubqUnfolding)},
Expand Down
6 changes: 6 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ const (
// Auto analyze will run if (table modify count)/(table row count) is greater than this value.
TiDBAutoAnalyzeRatio = "tidb_auto_analyze_ratio"

// Auto analyze will run if current time is within start time and end time.
TiDBAutoAnalyzeStartTime = "tidb_auto_analyze_start_time"
TiDBAutoAnalyzeEndTime = "tidb_auto_analyze_end_time"

// tidb_checksum_table_concurrency is used to speed up the ADMIN CHECKSUM TABLE
// statement, when a table has multiple indices, those indices can be
// scanned concurrently, with the cost of higher system performance impact.
Expand Down Expand Up @@ -189,6 +193,8 @@ const (
DefDistSQLScanConcurrency = 15
DefBuildStatsConcurrency = 4
DefAutoAnalyzeRatio = 0.5
DefAutoAnalyzeStartTime = "00:00 +0000"
DefAutoAnalyzeEndTime = "23:59 +0000"
DefChecksumTableConcurrency = 4
DefSkipUTF8Check = false
DefOptAggPushDown = false
Expand Down
26 changes: 26 additions & 0 deletions sessionctx/variable/varsutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,12 @@ func ValidateSetSystemVar(vars *SessionVars, name string, value string) (string,
return value, ErrWrongValueForVar.GenByArgs(name)
}
return value, nil
case TiDBAutoAnalyzeStartTime, TiDBAutoAnalyzeEndTime:
v, err := setAnalyzeTime(vars, value)
if err != nil {
return "", errors.Trace(err)
}
return v, nil
}
return value, nil
}
Expand Down Expand Up @@ -395,3 +401,23 @@ func GoTimeToTS(t time.Time) uint64 {
ts := (t.UnixNano() / int64(time.Millisecond)) << epochShiftBits
return uint64(ts)
}

const (
analyzeLocalTimeFormat = "15:04"
// AnalyzeFullTimeFormat is the full format of analyze start time and end time.
AnalyzeFullTimeFormat = "15:04 -0700"
)

func setAnalyzeTime(s *SessionVars, val string) (string, error) {
var t time.Time
var err error
if len(val) <= len(analyzeLocalTimeFormat) {
t, err = time.ParseInLocation(analyzeLocalTimeFormat, val, s.TimeZone)
} else {
t, err = time.ParseInLocation(AnalyzeFullTimeFormat, val, s.TimeZone)
}
if err != nil {
return "", errors.Trace(err)
}
return t.Format(AnalyzeFullTimeFormat), nil
}
78 changes: 62 additions & 16 deletions statistics/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -601,12 +601,28 @@ func TableAnalyzed(tbl *Table) bool {
return false
}

// needAnalyzeTable checks if we need to analyze the table:
// withinTimePeriod tests whether `now` is between `start` and `end`.
func withinTimePeriod(start, end, now time.Time) bool {
// Converts to UTC and only keeps the hour and minute info.
start, end, now = start.UTC(), end.UTC(), now.UTC()
start = time.Date(0, 0, 0, start.Hour(), start.Minute(), 0, 0, time.UTC)
end = time.Date(0, 0, 0, end.Hour(), end.Minute(), 0, 0, time.UTC)
now = time.Date(0, 0, 0, now.Hour(), now.Minute(), 0, 0, time.UTC)
// for cases like from 00:00 to 06:00
if end.Sub(start) >= 0 {
return now.Sub(start) >= 0 && now.Sub(end) <= 0
}
// for cases like from 22:00 to 06:00
return now.Sub(end) <= 0 || now.Sub(start) >= 0
}

// NeedAnalyzeTable checks if we need to analyze the table:
// 1. If the table has never been analyzed, we need to analyze it when it has
// not been modified for a time.
// not been modified for a while.
// 2. If the table had been analyzed before, we need to analyze it when
// "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio".
func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) bool {
// 3. The current time is between `start` and `end`.
func NeedAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64, start, end, now time.Time) bool {
analyzed := TableAnalyzed(tbl)
if !analyzed {
t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond))
Expand All @@ -616,34 +632,64 @@ func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64)
if autoAnalyzeRatio == 0 {
return false
}
return float64(tbl.ModifyCount)/float64(tbl.Count) > autoAnalyzeRatio
// No need to analyze it.
if float64(tbl.ModifyCount)/float64(tbl.Count) <= autoAnalyzeRatio {
return false
}
// Tests if current time is within the time period.
return withinTimePeriod(start, end, now)
}

const minAutoAnalyzeRatio = 0.3
const (
minAutoAnalyzeRatio = 0.3
)

func (h *Handle) getAutoAnalyzeRatio() float64 {
sql := fmt.Sprintf("select variable_value from mysql.global_variables where variable_name = '%s'", variable.TiDBAutoAnalyzeRatio)
func (h *Handle) getAutoAnalyzeParameters() map[string]string {
sql := fmt.Sprintf("select variable_name, variable_value from mysql.global_variables where variable_name in ('%s', '%s', '%s')",
variable.TiDBAutoAnalyzeRatio, variable.TiDBAutoAnalyzeStartTime, variable.TiDBAutoAnalyzeEndTime)
rows, _, err := h.restrictedExec.ExecRestrictedSQL(nil, sql)
if err != nil {
return variable.DefAutoAnalyzeRatio
return map[string]string{}
}
autoAnalyzeRatio := variable.DefAutoAnalyzeRatio
if len(rows) > 0 {
autoAnalyzeRatio, err = strconv.ParseFloat(rows[0].GetString(0), 64)
if err != nil {
return variable.DefAutoAnalyzeRatio
}
parameters := make(map[string]string)
for _, row := range rows {
parameters[row.GetString(0)] = row.GetString(1)
}
return parameters
}

func parseAutoAnalyzeRatio(ratio string) float64 {
autoAnalyzeRatio, err := strconv.ParseFloat(ratio, 64)
if err != nil {
return variable.DefAutoAnalyzeRatio
}
if autoAnalyzeRatio > 0 {
autoAnalyzeRatio = math.Max(autoAnalyzeRatio, minAutoAnalyzeRatio)
}
return autoAnalyzeRatio
}

func parseAnalyzePeriod(start, end string) (time.Time, time.Time, error) {
s, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, start, time.UTC)
if err != nil {
return s, s, errors.Trace(err)
}
e, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, end, time.UTC)
if err != nil {
return s, e, errors.Trace(err)
}
return s, e, nil
}

// HandleAutoAnalyze analyzes the newly created table or index.
func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
dbs := is.AllSchemaNames()
autoAnalyzeRatio := h.getAutoAnalyzeRatio()
parameters := h.getAutoAnalyzeParameters()
autoAnalyzeRatio := parseAutoAnalyzeRatio(parameters[variable.TiDBAutoAnalyzeRatio])
start, end, err := parseAnalyzePeriod(parameters[variable.TiDBAutoAnalyzeStartTime], parameters[variable.TiDBAutoAnalyzeEndTime])
if err != nil {
return errors.Trace(err)
}
for _, db := range dbs {
tbls := is.SchemaTables(model.NewCIStr(db))
for _, tbl := range tbls {
Expand All @@ -653,7 +699,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
continue
}
tblName := "`" + db + "`.`" + tblInfo.Name.O + "`"
if needAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio) {
if NeedAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio, start, end, time.Now()) {
sql := fmt.Sprintf("analyze table %s", tblName)
log.Infof("[stats] auto analyze table %s now", tblName)
return errors.Trace(h.execAutoAnalyze(sql))
Expand Down
106 changes: 106 additions & 0 deletions statistics/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ import (
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/store/tikv/oracle"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/ranger"
Expand Down Expand Up @@ -925,3 +927,107 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) {
c.Assert(s.hook.results, Equals, t.result)
}
}

func (s *testStatsUpdateSuite) TestNeedAnalyzeTable(c *C) {
columns := map[int64]*statistics.Column{}
columns[1] = &statistics.Column{Count: 1}
tests := []struct {
tbl *statistics.Table
ratio float64
limit time.Duration
start string
end string
now string
result bool
}{
// table was never analyzed and has reach the limit
{
tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))},
limit: 0,
ratio: 0,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: true,
},
// table was never analyzed but has not reach the limit
{
tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))},
limit: time.Hour,
ratio: 0,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: false,
},
// table was already analyzed but auto analyze is disabled
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: false,
},
// table was already analyzed and but modify count is small
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 0, Count: 1}},
limit: 0,
ratio: 0.3,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: false,
},
// table was already analyzed and but not within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:02 +0800",
result: false,
},
// table was already analyzed and but not within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
start: "22:00 +0800",
end: "06:00 +0800",
now: "10:00 +0800",
result: false,
},
// table was already analyzed and within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
start: "00:00 +0800",
end: "00:01 +0800",
now: "00:00 +0800",
result: true,
},
// table was already analyzed and within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
start: "22:00 +0800",
end: "06:00 +0800",
now: "23:00 +0800",
result: true,
},
}
for _, test := range tests {
start, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, test.start, time.UTC)
c.Assert(err, IsNil)
end, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, test.end, time.UTC)
c.Assert(err, IsNil)
now, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, test.now, time.UTC)
c.Assert(err, IsNil)
c.Assert(statistics.NeedAnalyzeTable(test.tbl, test.limit, test.ratio, start, end, now), Equals, test.result)
}
}