Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics/handle: refine the condition of dumping stats delta #41133

Merged
merged 11 commits into from
Feb 7, 2023
2 changes: 1 addition & 1 deletion .bazelrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
startup --host_jvm_args=-Xmx4g
startup --host_jvm_args=-Xmx8g
startup --unlimit_coredumps

run:ci --color=yes
Expand Down
37 changes: 28 additions & 9 deletions statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,22 +408,35 @@ var (
dumpStatsMaxDuration = time.Hour
)

// needDumpStatsDelta returns true when only updates a small portion of the table and the time since last update
// do not exceed one hour.
func needDumpStatsDelta(h *Handle, id int64, item variable.TableDelta, currentTime time.Time) bool {
if item.InitTime.IsZero() {
item.InitTime = currentTime
// needDumpStatsDelta checks whether to dump stats delta.
// 1. If the table doesn't exist or is a mem table or system table, then return false.
// 2. If the mode is DumpAll, then return true.
// 3. If the stats delta haven't been dumped in the past hour, then return true.
// 4. If the table stats is pseudo or empty or `Modify Count / Table Count` exceeds the threshold.
func (h *Handle) needDumpStatsDelta(is infoschema.InfoSchema, mode dumpMode, id int64, item variable.TableDelta, currentTime time.Time) bool {
tbl, ok := h.getTableByPhysicalID(is, id)
if !ok {
return false
}
tbl, ok := h.statsCache.Load().(statsCache).Get(id)
dbInfo, ok := is.SchemaByTable(tbl.Meta())
if !ok {
// No need to dump if the stats is invalid.
return false
}
if util.IsMemOrSysDB(dbInfo.Name.L) {
return false
}
if mode == DumpAll {
return true
}
if item.InitTime.IsZero() {
item.InitTime = currentTime
}
if currentTime.Sub(item.InitTime) > dumpStatsMaxDuration {
// Dump the stats to kv at least once an hour.
return true
}
if tbl.Count == 0 || float64(item.Count)/float64(tbl.Count) > DumpStatsDeltaRatio {
statsTbl := h.GetPartitionStats(tbl.Meta(), id)
if statsTbl.Pseudo || statsTbl.Count == 0 || float64(item.Count)/float64(statsTbl.Count) > DumpStatsDeltaRatio {
// Dump the stats when there are many modifications.
return true
}
Expand Down Expand Up @@ -492,9 +505,15 @@ func (h *Handle) DumpStatsDeltaToKV(mode dumpMode) error {
h.globalMap.data = deltaMap
h.globalMap.Unlock()
}()
// TODO: pass in do.InfoSchema() to DumpStatsDeltaToKV.
is := func() infoschema.InfoSchema {
h.mu.Lock()
defer h.mu.Unlock()
return h.mu.ctx.GetDomainInfoSchema().(infoschema.InfoSchema)
}()
currentTime := time.Now()
for id, item := range deltaMap {
if mode == DumpDelta && !needDumpStatsDelta(h, id, item, currentTime) {
if !h.needDumpStatsDelta(is, mode, id, item, currentTime) {
continue
}
updated, err := h.dumpTableStatCountToKV(id, item)
Expand Down
30 changes: 26 additions & 4 deletions statistics/handle/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2667,20 +2667,42 @@ func TestFillMissingStatsMeta(t *testing.T) {
}

tk.MustExec("insert into t1 values (1, 2), (3, 4)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpDelta))
require.NoError(t, h.Update(is))
ver1 := checkStatsMeta(tbl1ID, "2", "2")
tk.MustExec("delete from t1 where a = 1")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpDelta))
require.NoError(t, h.Update(is))
ver2 := checkStatsMeta(tbl1ID, "3", "1")
require.Greater(t, ver2, ver1)

tk.MustExec("insert into t2 values (1, 2), (3, 4)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpDelta))
require.NoError(t, h.Update(is))
checkStatsMeta(p0ID, "2", "2")
globalVer1 := checkStatsMeta(tbl2ID, "2", "2")
tk.MustExec("insert into t2 values (11, 12)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpDelta))
require.NoError(t, h.Update(is))
checkStatsMeta(p1ID, "1", "1")
globalVer2 := checkStatsMeta(tbl2ID, "3", "3")
require.Greater(t, globalVer2, globalVer1)
}

func TestNotDumpSysTable(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t1 (a int, b int)")
h := dom.StatsHandle()
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustQuery("select count(1) from mysql.stats_meta").Check(testkit.Rows("1"))
// After executing `delete from mysql.stats_meta`, a delta for mysql.stats_meta is created but it would not be dumped.
tk.MustExec("delete from mysql.stats_meta")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
is := dom.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("mysql"), model.NewCIStr("stats_meta"))
require.NoError(t, err)
tblID := tbl.Meta().ID
tk.MustQuery(fmt.Sprintf("select * from mysql.stats_meta where table_id = %v", tblID)).Check(testkit.Rows())
}