Skip to content

Commit f585f5d

Browse files
authored
statistics: avoid stats meta full load after table analysis (#57756)
close #57631
1 parent 8fe0618 commit f585f5d

File tree

12 files changed

+134
-32
lines changed

12 files changed

+134
-32
lines changed

pkg/executor/analyze.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,14 @@ func (e *AnalyzeExec) Next(ctx context.Context, _ *chunk.Chunk) error {
9999
if len(tasks) == 0 {
100100
return nil
101101
}
102+
tableAndPartitionIDs := make([]int64, 0, len(tasks))
103+
for _, task := range tasks {
104+
tableID := getTableIDFromTask(task)
105+
tableAndPartitionIDs = append(tableAndPartitionIDs, tableID.TableID)
106+
if tableID.IsPartitionTable() {
107+
tableAndPartitionIDs = append(tableAndPartitionIDs, tableID.PartitionID)
108+
}
109+
}
102110

103111
// Get the min number of goroutines for parallel execution.
104112
buildStatsConcurrency, err := getBuildStatsConcurrency(e.Ctx())
@@ -186,7 +194,7 @@ TASKLOOP:
186194
if err != nil {
187195
sessionVars.StmtCtx.AppendWarning(err)
188196
}
189-
return statsHandle.Update(ctx, infoSchema)
197+
return statsHandle.Update(ctx, infoSchema, tableAndPartitionIDs...)
190198
}
191199

192200
func (e *AnalyzeExec) waitFinish(ctx context.Context, g *errgroup.Group, resultsCh chan *statistics.AnalyzeResults) error {

pkg/statistics/handle/cache/bench_test.go

+9-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@ func benchCopyAndUpdate(b *testing.B, c types.StatsCache) {
3535
defer wg.Done()
3636
t1 := testutil.NewMockStatisticsTable(1, 1, true, false, false)
3737
t1.PhysicalID = rand.Int63()
38-
c.UpdateStatsCache([]*statistics.Table{t1}, nil)
38+
c.UpdateStatsCache(types.CacheUpdate{
39+
Updated: []*statistics.Table{t1},
40+
})
3941
}()
4042
}
4143
wg.Wait()
@@ -51,7 +53,9 @@ func benchPutGet(b *testing.B, c types.StatsCache) {
5153
defer wg.Done()
5254
t1 := testutil.NewMockStatisticsTable(1, 1, true, false, false)
5355
t1.PhysicalID = rand.Int63()
54-
c.UpdateStatsCache([]*statistics.Table{t1}, nil)
56+
c.UpdateStatsCache(types.CacheUpdate{
57+
Updated: []*statistics.Table{t1},
58+
})
5559
}(i)
5660
}
5761
for i := 0; i < b.N; i++ {
@@ -73,7 +77,9 @@ func benchGet(b *testing.B, c types.StatsCache) {
7377
defer w.Done()
7478
t1 := testutil.NewMockStatisticsTable(1, 1, true, false, false)
7579
t1.PhysicalID = rand.Int63()
76-
c.UpdateStatsCache([]*statistics.Table{t1}, nil)
80+
c.UpdateStatsCache(types.CacheUpdate{
81+
Updated: []*statistics.Table{t1},
82+
})
7783
}(i)
7884
}
7985
w.Wait()

pkg/statistics/handle/cache/statscache.go

+36-12
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ package cache
1616

1717
import (
1818
"context"
19+
"slices"
20+
"strconv"
1921
"sync/atomic"
2022
"time"
2123

@@ -64,19 +66,35 @@ func NewStatsCacheImplForTest() (types.StatsCache, error) {
6466
}
6567

6668
// Update reads stats meta from store and updates the stats map.
67-
func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema) error {
69+
func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema, tableAndPartitionIDs ...int64) error {
6870
start := time.Now()
6971
lastVersion := s.GetNextCheckVersionWithOffset()
7072
var (
71-
rows []chunk.Row
72-
err error
73+
skipMoveForwardStatsCache bool
74+
rows []chunk.Row
75+
err error
7376
)
7477
if err := util.CallWithSCtx(s.statsHandle.SPool(), func(sctx sessionctx.Context) error {
75-
rows, _, err = util.ExecRows(
76-
sctx,
77-
"SELECT version, table_id, modify_count, count, snapshot from mysql.stats_meta where version > %? order by version",
78-
lastVersion,
79-
)
78+
query := "SELECT version, table_id, modify_count, count, snapshot from mysql.stats_meta where version > %? "
79+
args := []any{lastVersion}
80+
81+
if len(tableAndPartitionIDs) > 0 {
82+
// When updating specific tables, we skip incrementing the max stats version to avoid missing
83+
// delta updates for other tables. The max version only advances when doing a full update.
84+
skipMoveForwardStatsCache = true
85+
// Sort and deduplicate the table IDs to remove duplicates
86+
slices.Sort(tableAndPartitionIDs)
87+
tableAndPartitionIDs = slices.Compact(tableAndPartitionIDs)
88+
// Convert table IDs to strings since the SQL executor only accepts string arrays for IN clauses
89+
tableStringIDs := make([]string, 0, len(tableAndPartitionIDs))
90+
for _, tableID := range tableAndPartitionIDs {
91+
tableStringIDs = append(tableStringIDs, strconv.FormatInt(tableID, 10))
92+
}
93+
query += "and table_id in (%?) "
94+
args = append(args, tableStringIDs)
95+
}
96+
query += "order by version"
97+
rows, _, err = util.ExecRows(sctx, query, args...)
8098
return err
8199
}); err != nil {
82100
return errors.Trace(err)
@@ -150,7 +168,13 @@ func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema) e
150168
tables = append(tables, tbl)
151169
}
152170

153-
s.UpdateStatsCache(tables, deletedTableIDs)
171+
s.UpdateStatsCache(types.CacheUpdate{
172+
Updated: tables,
173+
Deleted: deletedTableIDs,
174+
Options: types.UpdateOptions{
175+
SkipMoveForward: skipMoveForwardStatsCache,
176+
},
177+
})
154178
dur := time.Since(start)
155179
tidbmetrics.StatsDeltaLoadHistogram.Observe(dur.Seconds())
156180
return nil
@@ -191,12 +215,12 @@ func (s *StatsCacheImpl) replace(newCache *StatsCache) {
191215
}
192216

193217
// UpdateStatsCache updates the cache with the new cache.
194-
func (s *StatsCacheImpl) UpdateStatsCache(tables []*statistics.Table, deletedIDs []int64) {
218+
func (s *StatsCacheImpl) UpdateStatsCache(cacheUpdate types.CacheUpdate) {
195219
if enableQuota := config.GetGlobalConfig().Performance.EnableStatsCacheMemQuota; enableQuota {
196-
s.Load().Update(tables, deletedIDs)
220+
s.Load().Update(cacheUpdate.Updated, cacheUpdate.Deleted, cacheUpdate.Options.SkipMoveForward)
197221
} else {
198222
// TODO: remove this branch because we will always enable quota.
199-
newCache := s.Load().CopyAndUpdate(tables, deletedIDs)
223+
newCache := s.Load().CopyAndUpdate(cacheUpdate.Updated, cacheUpdate.Deleted)
200224
s.replace(newCache)
201225
}
202226
}

pkg/statistics/handle/cache/statscacheinner.go

+7-5
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ func (sc *StatsCache) CopyAndUpdate(tables []*statistics.Table, deletedIDs []int
163163
}
164164

165165
// Update updates the new statistics table cache.
166-
func (sc *StatsCache) Update(tables []*statistics.Table, deletedIDs []int64) {
166+
func (sc *StatsCache) Update(tables []*statistics.Table, deletedIDs []int64, skipMoveForwardStatsCache bool) {
167167
for _, tbl := range tables {
168168
id := tbl.PhysicalID
169169
metrics.UpdateCounter.Inc()
@@ -174,10 +174,12 @@ func (sc *StatsCache) Update(tables []*statistics.Table, deletedIDs []int64) {
174174
sc.c.Del(id)
175175
}
176176

177-
// update the maxTblStatsVer
178-
for _, t := range tables {
179-
if oldVersion := sc.maxTblStatsVer.Load(); t.Version > oldVersion {
180-
sc.maxTblStatsVer.CompareAndSwap(oldVersion, t.Version)
177+
if !skipMoveForwardStatsCache {
178+
// update the maxTblStatsVer
179+
for _, t := range tables {
180+
if oldVersion := sc.maxTblStatsVer.Load(); t.Version > oldVersion {
181+
sc.maxTblStatsVer.CompareAndSwap(oldVersion, t.Version)
182+
}
181183
}
182184
}
183185
}

pkg/statistics/handle/handle.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,9 @@ func (h *Handle) getPartitionStats(tblInfo *model.TableInfo, pid int64, returnPs
184184
tbl = statistics.PseudoTable(tblInfo, false, true)
185185
tbl.PhysicalID = pid
186186
if tblInfo.GetPartitionInfo() == nil || h.Len() < 64 {
187-
h.UpdateStatsCache([]*statistics.Table{tbl}, nil)
187+
h.UpdateStatsCache(types.CacheUpdate{
188+
Updated: []*statistics.Table{tbl},
189+
})
188190
}
189191
return tbl
190192
}

pkg/statistics/handle/handletest/statstest/BUILD.bazel

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ go_test(
99
],
1010
flaky = True,
1111
race = "on",
12-
shard_count = 12,
12+
shard_count = 13,
1313
deps = [
1414
"//pkg/config",
1515
"//pkg/parser/model",

pkg/statistics/handle/handletest/statstest/stats_test.go

+32
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,38 @@ import (
3131
"github.com/stretchr/testify/require"
3232
)
3333

34+
func TestStatsCacheProcess(t *testing.T) {
35+
store, dom := testkit.CreateMockStoreAndDomain(t)
36+
testKit := testkit.NewTestKit(t, store)
37+
testKit.MustExec("use test")
38+
testKit.MustExec("create table t (c1 int, c2 int)")
39+
testKit.MustExec("insert into t values(1, 2)")
40+
analyzehelper.TriggerPredicateColumnsCollection(t, testKit, store, "t", "c1", "c2")
41+
do := dom
42+
is := do.InfoSchema()
43+
tbl, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t"))
44+
require.NoError(t, err)
45+
tableInfo := tbl.Meta()
46+
statsTbl := do.StatsHandle().GetTableStats(tableInfo)
47+
require.True(t, statsTbl.Pseudo)
48+
require.Zero(t, statsTbl.Version)
49+
currentVersion := do.StatsHandle().MaxTableStatsVersion()
50+
testKit.MustExec("analyze table t")
51+
statsTbl = do.StatsHandle().GetTableStats(tableInfo)
52+
require.False(t, statsTbl.Pseudo)
53+
require.NotZero(t, statsTbl.Version)
54+
require.Equal(t, currentVersion, do.StatsHandle().MaxTableStatsVersion())
55+
newVersion := do.StatsHandle().GetNextCheckVersionWithOffset()
56+
require.Equal(t, currentVersion, newVersion, "analyze should not move forward the stats cache version")
57+
58+
// Insert more rows
59+
testKit.MustExec("insert into t values(2, 3)")
60+
require.NoError(t, do.StatsHandle().DumpStatsDeltaToKV(true))
61+
require.NoError(t, do.StatsHandle().Update(context.Background(), is))
62+
newVersion = do.StatsHandle().MaxTableStatsVersion()
63+
require.NotEqual(t, currentVersion, newVersion, "update with no table should move forward the stats cache version")
64+
}
65+
3466
func TestStatsCache(t *testing.T) {
3567
store, dom := testkit.CreateMockStoreAndDomain(t)
3668
testKit := testkit.NewTestKit(t, store)

pkg/statistics/handle/storage/read.go

+9-3
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,9 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.
658658
if loadNeeded && !analyzed {
659659
fakeCol := statistics.EmptyColumn(tblInfo.ID, tblInfo.PKIsHandle, colInfo)
660660
statsTbl.SetCol(col.ID, fakeCol)
661-
statsHandle.UpdateStatsCache([]*statistics.Table{statsTbl}, nil)
661+
statsHandle.UpdateStatsCache(statstypes.CacheUpdate{
662+
Updated: []*statistics.Table{statsTbl},
663+
})
662664
}
663665
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
664666
return nil
@@ -720,7 +722,9 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.
720722
}
721723
}
722724
statsTbl.SetCol(col.ID, colHist)
723-
statsHandle.UpdateStatsCache([]*statistics.Table{statsTbl}, nil)
725+
statsHandle.UpdateStatsCache(statstypes.CacheUpdate{
726+
Updated: []*statistics.Table{statsTbl},
727+
})
724728
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
725729
if col.IsSyncLoadFailed {
726730
logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.",
@@ -782,7 +786,9 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, is infoschema.InfoSchema
782786
tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion)
783787
}
784788
tbl.SetIdx(idx.ID, idxHist)
785-
statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil)
789+
statsHandle.UpdateStatsCache(statstypes.CacheUpdate{
790+
Updated: []*statistics.Table{tbl},
791+
})
786792
if idx.IsSyncLoadFailed {
787793
logutil.BgLogger().Warn("Hist for index should already be loaded as sync but not found.",
788794
zap.Int64("table_id", idx.TableID),

pkg/statistics/handle/storage/stats_read_writer.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,9 @@ func (s *statsReadWriter) ReloadExtendedStatistics() error {
277277
}
278278
tables = append(tables, t)
279279
}
280-
s.statsHandler.UpdateStatsCache(tables, nil)
280+
s.statsHandler.UpdateStatsCache(statstypes.CacheUpdate{
281+
Updated: tables,
282+
})
281283
return nil
282284
}, util.FlagWrapTxn)
283285
}

pkg/statistics/handle/storage/update.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,9 @@ func removeExtendedStatsItem(statsCache types.StatsCache,
211211
}
212212
newTbl := tbl.Copy()
213213
delete(newTbl.ExtendedStats.Stats, statsName)
214-
statsCache.UpdateStatsCache([]*statistics.Table{newTbl}, nil)
214+
statsCache.UpdateStatsCache(types.CacheUpdate{
215+
Updated: []*statistics.Table{newTbl},
216+
})
215217
}
216218

217219
var changeGlobalStatsTables = []string{

pkg/statistics/handle/syncload/stats_syncload.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,8 @@ func (s *statsSyncLoad) updateCachedItem(tblInfo table.Table, item model.TableIt
621621
tbl.StatsVer = statistics.Version0
622622
}
623623
}
624-
s.statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil)
624+
s.statsHandle.UpdateStatsCache(statstypes.CacheUpdate{
625+
Updated: []*statistics.Table{tbl},
626+
})
625627
return true
626628
}

pkg/statistics/handle/types/interfaces.go

+19-3
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,22 @@ type StatsAnalyze interface {
195195
Close()
196196
}
197197

198+
// CacheUpdate encapsulates changes to be made to the stats cache
199+
type CacheUpdate struct {
200+
Updated []*statistics.Table
201+
Deleted []int64
202+
Options UpdateOptions
203+
}
204+
205+
// UpdateOptions contains configuration for cache updates
206+
type UpdateOptions struct {
207+
// SkipMoveForward controls whether to skip updating the cache's max version number.
208+
// When true, the cache max version number stays unchanged even after updates.
209+
// This improves performance when analyzing a small number of tables by avoiding
210+
// unnecessary full cache reloads that would normally be triggered by version changes.
211+
SkipMoveForward bool
212+
}
213+
198214
// StatsCache is used to manage all table statistics in memory.
199215
type StatsCache interface {
200216
// Close closes this cache.
@@ -204,7 +220,7 @@ type StatsCache interface {
204220
Clear()
205221

206222
// Update reads stats meta from store and updates the stats map.
207-
Update(ctx context.Context, is infoschema.InfoSchema) error
223+
Update(ctx context.Context, is infoschema.InfoSchema, tableAndPartitionIDs ...int64) error
208224

209225
// MemConsumed returns its memory usage.
210226
MemConsumed() (size int64)
@@ -215,8 +231,8 @@ type StatsCache interface {
215231
// Put puts this table stats into the cache.
216232
Put(tableID int64, t *statistics.Table)
217233

218-
// UpdateStatsCache updates the cache.
219-
UpdateStatsCache(addedTables []*statistics.Table, deletedTableIDs []int64)
234+
// UpdateStatsCache applies a batch of changes to the cache
235+
UpdateStatsCache(update CacheUpdate)
220236

221237
// GetNextCheckVersionWithOffset returns the last version with offset.
222238
// It is used to fetch updated statistics from the stats meta table.

0 commit comments

Comments
 (0)