Skip to content

Commit

Permalink
statistics: do not directly update global stats when dropping a parti…
Browse files Browse the repository at this point in the history
…tion (#48846)

ref #48182
  • Loading branch information
Rustin170506 committed Nov 24, 2023
1 parent 29e36ea commit ebca7ba
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 138 deletions.
12 changes: 2 additions & 10 deletions pkg/statistics/handle/ddl/ddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,8 @@ func (h *ddlHandlerImpl) HandleDDLEvent(t *util.DDLEvent) error {
}
}
case model.ActionDropTablePartition:
pruneMode, err := util.GetCurrentPruneMode(h.statsHandler.SPool())
if err != nil {
return err
}
globalTableInfo, droppedPartitionInfo := t.GetDropPartitionInfo()
if variable.PartitionPruneMode(pruneMode) == variable.Dynamic && droppedPartitionInfo != nil {
if err := h.globalStatsHandler.UpdateGlobalStats(globalTableInfo); err != nil {
return err
}
}
// TODO: Update the modify count and count for the global table.
_, droppedPartitionInfo := t.GetDropPartitionInfo()
for _, def := range droppedPartitionInfo.Definitions {
if err := h.statsWriter.ResetTableStats2KVForDrop(def.ID); err != nil {
return err
Expand Down
1 change: 0 additions & 1 deletion pkg/statistics/handle/globalstats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ go_library(
"//pkg/parser/model",
"//pkg/sessionctx",
"//pkg/sessionctx/stmtctx",
"//pkg/sessiontxn",
"//pkg/statistics",
"//pkg/statistics/handle/logutil",
"//pkg/statistics/handle/storage",
Expand Down
111 changes: 0 additions & 111 deletions pkg/statistics/handle/globalstats/global_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@ import (
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/sessiontxn"
"github.com/pingcap/tidb/pkg/statistics"
statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types"
"github.com/pingcap/tidb/pkg/statistics/handle/util"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/tiancaiamao/gp"
Expand Down Expand Up @@ -57,14 +55,6 @@ func (sg *statsGlobalImpl) MergePartitionStats2GlobalStatsByTableID(sc sessionct
return MergePartitionStats2GlobalStatsByTableID(sc, sg.statsHandler, opts, is, physicalID, isIndex, histIDs)
}

// UpdateGlobalStats will trigger the merge of global-stats when we drop table partition
func (sg *statsGlobalImpl) UpdateGlobalStats(tblInfo *model.TableInfo) error {
// We need to merge the partition-level stats to global-stats when we drop table partition in dynamic mode.
return util.CallWithSCtx(sg.statsHandler.SPool(), func(sctx sessionctx.Context) error {
return UpdateGlobalStats(sctx, sg.statsHandler, tblInfo)
})
}

// GlobalStats is used to store the statistics contained in the global-level stats
// which is generated by the merge of partition-level stats.
// It will both store the column stats and index stats.
Expand Down Expand Up @@ -170,107 +160,6 @@ var analyzeOptionDefault = map[ast.AnalyzeOptionType]uint64{
ast.AnalyzeOptNumTopN: 20,
}

// UpdateGlobalStats update the global-level stats based on the partition-level stats.
func UpdateGlobalStats(
sctx sessionctx.Context,
statsHandle statstypes.StatsHandle,
tblInfo *model.TableInfo) error {
tableID := tblInfo.ID
is := sessiontxn.GetTxnManager(sctx).GetTxnInfoSchema()
globalStats, err := statsHandle.TableStatsFromStorage(tblInfo, tableID, true, 0)
if err != nil {
return err
}
// If we do not currently have global-stats, no new global-stats will be generated.
if globalStats == nil {
return nil
}
opts := make(map[ast.AnalyzeOptionType]uint64, len(analyzeOptionDefault))
for key, val := range analyzeOptionDefault {
opts[key] = val
}
// Use current global-stats related information to construct the opts for `MergePartitionStats2GlobalStats` function.
globalColStatsTopNNum, globalColStatsBucketNum := 0, 0
for colID := range globalStats.Columns {
globalColStatsTopN := globalStats.Columns[colID].TopN
if globalColStatsTopN != nil && len(globalColStatsTopN.TopN) > globalColStatsTopNNum {
globalColStatsTopNNum = len(globalColStatsTopN.TopN)
}
globalColStats := globalStats.Columns[colID]
if globalColStats != nil && len(globalColStats.Buckets) > globalColStatsBucketNum {
globalColStatsBucketNum = len(globalColStats.Buckets)
}
}
if globalColStatsTopNNum != 0 {
opts[ast.AnalyzeOptNumTopN] = uint64(globalColStatsTopNNum)
}
if globalColStatsBucketNum != 0 {
opts[ast.AnalyzeOptNumBuckets] = uint64(globalColStatsBucketNum)
}
// Generate the new column global-stats
newColGlobalStats, err := MergePartitionStats2GlobalStats(sctx, statsHandle, opts, is, tblInfo, false, nil)
if err != nil {
return err
}
if len(newColGlobalStats.MissingPartitionStats) > 0 {
logutil.BgLogger().Warn("missing partition stats when merging global stats", zap.String("table", tblInfo.Name.L),
zap.String("item", "columns"), zap.Strings("missing", newColGlobalStats.MissingPartitionStats))
}
for i := 0; i < newColGlobalStats.Num; i++ {
hg, cms, topN := newColGlobalStats.Hg[i], newColGlobalStats.Cms[i], newColGlobalStats.TopN[i]
if hg == nil {
// All partitions have no stats so global stats are not created.
continue
}
// fms for global stats doesn't need to dump to kv.
err = statsHandle.SaveStatsToStorage(tableID, newColGlobalStats.Count, newColGlobalStats.ModifyCount,
0, hg, cms, topN, 2, 1, false, util.StatsMetaHistorySourceSchemaChange)
if err != nil {
return err
}
}

// Generate the new index global-stats
globalIdxStatsTopNNum, globalIdxStatsBucketNum := 0, 0
for _, idx := range tblInfo.Indices {
globalIdxStatsTopN := globalStats.Indices[idx.ID].TopN
if globalIdxStatsTopN != nil && len(globalIdxStatsTopN.TopN) > globalIdxStatsTopNNum {
globalIdxStatsTopNNum = len(globalIdxStatsTopN.TopN)
}
globalIdxStats := globalStats.Indices[idx.ID]
if globalIdxStats != nil && len(globalIdxStats.Buckets) > globalIdxStatsBucketNum {
globalIdxStatsBucketNum = len(globalIdxStats.Buckets)
}
if globalIdxStatsTopNNum != 0 {
opts[ast.AnalyzeOptNumTopN] = uint64(globalIdxStatsTopNNum)
}
if globalIdxStatsBucketNum != 0 {
opts[ast.AnalyzeOptNumBuckets] = uint64(globalIdxStatsBucketNum)
}
newIndexGlobalStats, err := MergePartitionStats2GlobalStats(sctx, statsHandle, opts, is, tblInfo, true, []int64{idx.ID})
if err != nil {
return err
}
if len(newIndexGlobalStats.MissingPartitionStats) > 0 {
logutil.BgLogger().Warn("missing partition stats when merging global stats", zap.String("table", tblInfo.Name.L),
zap.String("item", "index "+idx.Name.L), zap.Strings("missing", newIndexGlobalStats.MissingPartitionStats))
}
for i := 0; i < newIndexGlobalStats.Num; i++ {
hg, cms, topN := newIndexGlobalStats.Hg[i], newIndexGlobalStats.Cms[i], newIndexGlobalStats.TopN[i]
if hg == nil {
// All partitions have no stats so global stats are not created.
continue
}
// fms for global stats doesn't need to dump to kv.
err = statsHandle.SaveStatsToStorage(tableID, newIndexGlobalStats.Count, newIndexGlobalStats.ModifyCount, 1, hg, cms, topN, 2, 1, false, util.StatsMetaHistorySourceSchemaChange)
if err != nil {
return err
}
}
}
return nil
}

// blockingMergePartitionStats2GlobalStats merge the partition-level stats to global-level stats based on the tableInfo.
// It is the old algorithm to merge partition-level stats to global-level stats. It will happen the OOM. because it will load all the partition-level stats into memory.
func blockingMergePartitionStats2GlobalStats(
Expand Down
16 changes: 3 additions & 13 deletions pkg/statistics/handle/globalstats/globalstats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -903,32 +903,22 @@ func TestDDLPartition4GlobalStats(t *testing.T) {
globalStats := h.GetTableStats(tableInfo)
require.Equal(t, int64(15), globalStats.RealtimeCount)

tk.MustExec("alter table t drop partition p3, p5;")
require.NoError(t, h.DumpStatsDeltaToKV(true))
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
require.NoError(t, h.Update(is))
result = tk.MustQuery("show stats_meta where table_name = 't';").Rows()
require.Len(t, result, 5)
// The value of global.count will be updated automatically after we drop the table partition.
globalStats = h.GetTableStats(tableInfo)
require.Equal(t, int64(11), globalStats.RealtimeCount)

tk.MustExec("alter table t truncate partition p2, p4;")
require.NoError(t, h.DumpStatsDeltaToKV(true))
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
require.NoError(t, h.Update(is))
// The value of global.count will not be updated automatically when we truncate the table partition.
// Because the partition-stats in the partition table which have been truncated has not been updated.
globalStats = h.GetTableStats(tableInfo)
require.Equal(t, int64(11), globalStats.RealtimeCount)
require.Equal(t, int64(15), globalStats.RealtimeCount)

tk.MustExec("analyze table t;")
result = tk.MustQuery("show stats_meta where table_name = 't';").Rows()
// The truncate operation only delete the data from the partition p2 and p4. It will not delete the partition-stats.
require.Len(t, result, 5)
require.Len(t, result, 7)
// The result for the globalStats.count will be right now
globalStats = h.GetTableStats(tableInfo)
require.Equal(t, int64(7), globalStats.RealtimeCount)
require.Equal(t, int64(11), globalStats.RealtimeCount)
}

func TestGlobalStatsNDV(t *testing.T) {
Expand Down
3 changes: 0 additions & 3 deletions pkg/statistics/handle/types/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,6 @@ type StatsGlobal interface {
isIndex bool,
histIDs []int64,
) (globalStats interface{}, err error)

// UpdateGlobalStats will trigger the merge of global-stats when we drop table partition
UpdateGlobalStats(tblInfo *model.TableInfo) error
}

// DDL is used to handle ddl events.
Expand Down

0 comments on commit ebca7ba

Please sign in to comment.