From c0fc3baf084ae3343328f30195c5e38f0fbe9ccf Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Thu, 28 Mar 2024 18:38:19 +0800 Subject: [PATCH] statistics: shard needsStatsMap (#52183) ref pingcap/tidb#51853 --- pkg/statistics/column.go | 2 +- pkg/statistics/table.go | 61 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/pkg/statistics/column.go b/pkg/statistics/column.go index 224de21cd16a6..4ad4b444451b3 100644 --- a/pkg/statistics/column.go +++ b/pkg/statistics/column.go @@ -134,7 +134,7 @@ func (c *Column) MemoryUsage() CacheItemMemoryUsage { // HistogramNeededItems stores the columns/indices whose Histograms need to be loaded from physical kv layer. // Currently, we only load index/pk's Histogram from kv automatically. Columns' are loaded by needs. -var HistogramNeededItems = neededStatsMap{items: map[model.TableItemID]struct{}{}} +var HistogramNeededItems = newNeededStatsMap() // ColumnStatsIsInvalid checks if this column is invalid. // If this column has histogram but not loaded yet, diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index a7be276c83ca1..b3adb3597e4d4 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -653,12 +653,12 @@ func (t *Table) IndexIsLoadNeeded(id int64) (*Index, bool) { return idx, false } -type neededStatsMap struct { +type neededStatsInternalMap struct { items map[model.TableItemID]struct{} m sync.RWMutex } -func (n *neededStatsMap) AllItems() []model.TableItemID { +func (n *neededStatsInternalMap) AllItems() []model.TableItemID { n.m.RLock() keys := make([]model.TableItemID, 0, len(n.items)) for key := range n.items { @@ -668,24 +668,75 @@ func (n *neededStatsMap) AllItems() []model.TableItemID { return keys } -func (n *neededStatsMap) Insert(col model.TableItemID) { +func (n *neededStatsInternalMap) Insert(col model.TableItemID) { n.m.Lock() n.items[col] = struct{}{} n.m.Unlock() } -func (n *neededStatsMap) Delete(col model.TableItemID) { +func (n *neededStatsInternalMap) Delete(col model.TableItemID) { n.m.Lock() delete(n.items, col) n.m.Unlock() } -func (n *neededStatsMap) Length() int { +func (n *neededStatsInternalMap) Length() int { n.m.RLock() defer n.m.RUnlock() return len(n.items) } +const shardCnt = 128 + +type neededStatsMap struct { + items [shardCnt]neededStatsInternalMap +} + +func getIdx(tbl model.TableItemID) int64 { + var id int64 + if tbl.ID < 0 { + id = -tbl.ID + } else { + id = tbl.ID + } + return id % shardCnt +} + +func newNeededStatsMap() *neededStatsMap { + result := neededStatsMap{} + for i := 0; i < shardCnt; i++ { + result.items[i] = neededStatsInternalMap{ + items: make(map[model.TableItemID]struct{}), + } + } + return &result +} + +func (n *neededStatsMap) AllItems() []model.TableItemID { + var result []model.TableItemID + for i := 0; i < shardCnt; i++ { + keys := n.items[i].AllItems() + result = append(result, keys...) + } + return result +} + +func (n *neededStatsMap) Insert(col model.TableItemID) { + n.items[getIdx(col)].Insert(col) +} + +func (n *neededStatsMap) Delete(col model.TableItemID) { + n.items[getIdx(col)].Delete(col) +} + +func (n *neededStatsMap) Length() int { + var result int + for i := 0; i < shardCnt; i++ { + result += n.items[i].Length() + } + return result +} + // RatioOfPseudoEstimate means if modifyCount / statsTblCount is greater than this ratio, we think the stats is invalid // and use pseudo estimation. var RatioOfPseudoEstimate = atomic.NewFloat64(0.7)