Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: do not directly update global stats when dropping a partition #48846

Merged
merged 2 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions pkg/statistics/handle/ddl/ddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,8 @@ func (h *ddlHandlerImpl) HandleDDLEvent(t *util.DDLEvent) error {
}
}
case model.ActionDropTablePartition:
pruneMode, err := util.GetCurrentPruneMode(h.statsHandler.SPool())
if err != nil {
return err
}
globalTableInfo, droppedPartitionInfo := t.GetDropPartitionInfo()
if variable.PartitionPruneMode(pruneMode) == variable.Dynamic && droppedPartitionInfo != nil {
if err := h.globalStatsHandler.UpdateGlobalStats(globalTableInfo); err != nil {
return err
}
}
// TODO: Update the modify count and count for the global table.
_, droppedPartitionInfo := t.GetDropPartitionInfo()
for _, def := range droppedPartitionInfo.Definitions {
if err := h.statsWriter.ResetTableStats2KVForDrop(def.ID); err != nil {
return err
Expand Down
1 change: 0 additions & 1 deletion pkg/statistics/handle/globalstats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ go_library(
"//pkg/parser/model",
"//pkg/sessionctx",
"//pkg/sessionctx/stmtctx",
"//pkg/sessiontxn",
"//pkg/statistics",
"//pkg/statistics/handle/logutil",
"//pkg/statistics/handle/storage",
Expand Down
111 changes: 0 additions & 111 deletions pkg/statistics/handle/globalstats/global_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@ import (
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/sessiontxn"
"github.com/pingcap/tidb/pkg/statistics"
statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types"
"github.com/pingcap/tidb/pkg/statistics/handle/util"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/tiancaiamao/gp"
Expand Down Expand Up @@ -57,14 +55,6 @@ func (sg *statsGlobalImpl) MergePartitionStats2GlobalStatsByTableID(sc sessionct
return MergePartitionStats2GlobalStatsByTableID(sc, sg.statsHandler, opts, is, physicalID, isIndex, histIDs)
}

// UpdateGlobalStats will trigger the merge of global-stats when we drop table partition
func (sg *statsGlobalImpl) UpdateGlobalStats(tblInfo *model.TableInfo) error {
// We need to merge the partition-level stats to global-stats when we drop table partition in dynamic mode.
return util.CallWithSCtx(sg.statsHandler.SPool(), func(sctx sessionctx.Context) error {
return UpdateGlobalStats(sctx, sg.statsHandler, tblInfo)
})
}

// GlobalStats is used to store the statistics contained in the global-level stats
// which is generated by the merge of partition-level stats.
// It will both store the column stats and index stats.
Expand Down Expand Up @@ -170,107 +160,6 @@ var analyzeOptionDefault = map[ast.AnalyzeOptionType]uint64{
ast.AnalyzeOptNumTopN: 20,
}

// UpdateGlobalStats update the global-level stats based on the partition-level stats.
func UpdateGlobalStats(
sctx sessionctx.Context,
statsHandle statstypes.StatsHandle,
tblInfo *model.TableInfo) error {
tableID := tblInfo.ID
is := sessiontxn.GetTxnManager(sctx).GetTxnInfoSchema()
globalStats, err := statsHandle.TableStatsFromStorage(tblInfo, tableID, true, 0)
if err != nil {
return err
}
// If we do not currently have global-stats, no new global-stats will be generated.
if globalStats == nil {
return nil
}
opts := make(map[ast.AnalyzeOptionType]uint64, len(analyzeOptionDefault))
for key, val := range analyzeOptionDefault {
opts[key] = val
}
// Use current global-stats related information to construct the opts for `MergePartitionStats2GlobalStats` function.
globalColStatsTopNNum, globalColStatsBucketNum := 0, 0
for colID := range globalStats.Columns {
globalColStatsTopN := globalStats.Columns[colID].TopN
if globalColStatsTopN != nil && len(globalColStatsTopN.TopN) > globalColStatsTopNNum {
globalColStatsTopNNum = len(globalColStatsTopN.TopN)
}
globalColStats := globalStats.Columns[colID]
if globalColStats != nil && len(globalColStats.Buckets) > globalColStatsBucketNum {
globalColStatsBucketNum = len(globalColStats.Buckets)
}
}
if globalColStatsTopNNum != 0 {
opts[ast.AnalyzeOptNumTopN] = uint64(globalColStatsTopNNum)
}
if globalColStatsBucketNum != 0 {
opts[ast.AnalyzeOptNumBuckets] = uint64(globalColStatsBucketNum)
}
// Generate the new column global-stats
newColGlobalStats, err := MergePartitionStats2GlobalStats(sctx, statsHandle, opts, is, tblInfo, false, nil)
if err != nil {
return err
}
if len(newColGlobalStats.MissingPartitionStats) > 0 {
logutil.BgLogger().Warn("missing partition stats when merging global stats", zap.String("table", tblInfo.Name.L),
zap.String("item", "columns"), zap.Strings("missing", newColGlobalStats.MissingPartitionStats))
}
for i := 0; i < newColGlobalStats.Num; i++ {
hg, cms, topN := newColGlobalStats.Hg[i], newColGlobalStats.Cms[i], newColGlobalStats.TopN[i]
if hg == nil {
// All partitions have no stats so global stats are not created.
continue
}
// fms for global stats doesn't need to dump to kv.
err = statsHandle.SaveStatsToStorage(tableID, newColGlobalStats.Count, newColGlobalStats.ModifyCount,
0, hg, cms, topN, 2, 1, false, util.StatsMetaHistorySourceSchemaChange)
if err != nil {
return err
}
}

// Generate the new index global-stats
globalIdxStatsTopNNum, globalIdxStatsBucketNum := 0, 0
for _, idx := range tblInfo.Indices {
globalIdxStatsTopN := globalStats.Indices[idx.ID].TopN
if globalIdxStatsTopN != nil && len(globalIdxStatsTopN.TopN) > globalIdxStatsTopNNum {
globalIdxStatsTopNNum = len(globalIdxStatsTopN.TopN)
}
globalIdxStats := globalStats.Indices[idx.ID]
if globalIdxStats != nil && len(globalIdxStats.Buckets) > globalIdxStatsBucketNum {
globalIdxStatsBucketNum = len(globalIdxStats.Buckets)
}
if globalIdxStatsTopNNum != 0 {
opts[ast.AnalyzeOptNumTopN] = uint64(globalIdxStatsTopNNum)
}
if globalIdxStatsBucketNum != 0 {
opts[ast.AnalyzeOptNumBuckets] = uint64(globalIdxStatsBucketNum)
}
newIndexGlobalStats, err := MergePartitionStats2GlobalStats(sctx, statsHandle, opts, is, tblInfo, true, []int64{idx.ID})
if err != nil {
return err
}
if len(newIndexGlobalStats.MissingPartitionStats) > 0 {
logutil.BgLogger().Warn("missing partition stats when merging global stats", zap.String("table", tblInfo.Name.L),
zap.String("item", "index "+idx.Name.L), zap.Strings("missing", newIndexGlobalStats.MissingPartitionStats))
}
for i := 0; i < newIndexGlobalStats.Num; i++ {
hg, cms, topN := newIndexGlobalStats.Hg[i], newIndexGlobalStats.Cms[i], newIndexGlobalStats.TopN[i]
if hg == nil {
// All partitions have no stats so global stats are not created.
continue
}
// fms for global stats doesn't need to dump to kv.
err = statsHandle.SaveStatsToStorage(tableID, newIndexGlobalStats.Count, newIndexGlobalStats.ModifyCount, 1, hg, cms, topN, 2, 1, false, util.StatsMetaHistorySourceSchemaChange)
if err != nil {
return err
}
}
}
return nil
}

// blockingMergePartitionStats2GlobalStats merge the partition-level stats to global-level stats based on the tableInfo.
// It is the old algorithm to merge partition-level stats to global-level stats. It will happen the OOM. because it will load all the partition-level stats into memory.
func blockingMergePartitionStats2GlobalStats(
Expand Down
16 changes: 3 additions & 13 deletions pkg/statistics/handle/globalstats/globalstats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -903,32 +903,22 @@ func TestDDLPartition4GlobalStats(t *testing.T) {
globalStats := h.GetTableStats(tableInfo)
require.Equal(t, int64(15), globalStats.RealtimeCount)

tk.MustExec("alter table t drop partition p3, p5;")
require.NoError(t, h.DumpStatsDeltaToKV(true))
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
require.NoError(t, h.Update(is))
result = tk.MustQuery("show stats_meta where table_name = 't';").Rows()
require.Len(t, result, 5)
// The value of global.count will be updated automatically after we drop the table partition.
globalStats = h.GetTableStats(tableInfo)
require.Equal(t, int64(11), globalStats.RealtimeCount)

tk.MustExec("alter table t truncate partition p2, p4;")
require.NoError(t, h.DumpStatsDeltaToKV(true))
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
require.NoError(t, h.Update(is))
// The value of global.count will not be updated automatically when we truncate the table partition.
// Because the partition-stats in the partition table which have been truncated has not been updated.
globalStats = h.GetTableStats(tableInfo)
require.Equal(t, int64(11), globalStats.RealtimeCount)
require.Equal(t, int64(15), globalStats.RealtimeCount)

tk.MustExec("analyze table t;")
result = tk.MustQuery("show stats_meta where table_name = 't';").Rows()
// The truncate operation only delete the data from the partition p2 and p4. It will not delete the partition-stats.
require.Len(t, result, 5)
require.Len(t, result, 7)
// The result for the globalStats.count will be right now
globalStats = h.GetTableStats(tableInfo)
require.Equal(t, int64(7), globalStats.RealtimeCount)
require.Equal(t, int64(11), globalStats.RealtimeCount)
}

func TestGlobalStatsNDV(t *testing.T) {
Expand Down
3 changes: 0 additions & 3 deletions pkg/statistics/handle/types/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,6 @@ type StatsGlobal interface {
isIndex bool,
histIDs []int64,
) (globalStats interface{}, err error)

// UpdateGlobalStats will trigger the merge of global-stats when we drop table partition
UpdateGlobalStats(tblInfo *model.TableInfo) error
}

// DDL is used to handle ddl events.
Expand Down