Skip to content

Commit

Permalink
planner: log the reason why the sample-rate is chosen when analyzing …
Browse files Browse the repository at this point in the history
…table (#45938) (#45968)

close #45936
  • Loading branch information
ti-chi-bot authored Oct 16, 2023
1 parent f628f52 commit 6de161e
Show file tree
Hide file tree
Showing 10 changed files with 78 additions and 48 deletions.
3 changes: 2 additions & 1 deletion executor/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,8 @@ func finishJobWithLog(sctx sessionctx.Context, job *statistics.AnalyzeJob, analy
zap.String("job info", job.JobInfo),
zap.Time("start time", job.StartTime),
zap.Time("end time", job.EndTime),
zap.String("cost", job.EndTime.Sub(job.StartTime).String()))
zap.String("cost", job.EndTime.Sub(job.StartTime).String()),
zap.String("sample rate reason", job.SampleRateReason))
}
}

Expand Down
4 changes: 2 additions & 2 deletions executor/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ func TestMergeGlobalStatsWithUnAnalyzedPartition(t *testing.T) {
tk.MustExec("analyze table t partition p2 index idxc;")
tk.MustQuery("show warnings").Check(testkit.Rows(
"Warning 1105 The version 2 would collect all statistics not only the selected indexes",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2"))
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\""))
tk.MustExec("analyze table t partition p0;")
tk.MustQuery("show warnings").Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0"))
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/2) as the sample-rate=1\""))
}
66 changes: 43 additions & 23 deletions executor/analyzetest/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -668,14 +668,14 @@ func TestAdjustSampleRateNote(t *testing.T) {
result := tk.MustQuery("show stats_meta where table_name = 't'")
require.Equal(t, "220000", result.Rows()[0][5])
tk.MustExec("analyze table t")
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 0.500000 for table test.t"))
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 0.500000 for table test.t, reason to use this rate is \"use min(1, 110000/220000) as the sample-rate=0.5\""))
tk.MustExec("insert into t values(1),(1),(1)")
require.NoError(t, statsHandle.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, statsHandle.Update(is))
result = tk.MustQuery("show stats_meta where table_name = 't'")
require.Equal(t, "3", result.Rows()[0][5])
tk.MustExec("analyze table t")
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t"))
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\""))
}

func TestFastAnalyze4GlobalStats(t *testing.T) {
Expand Down Expand Up @@ -924,7 +924,7 @@ func TestSmallTableAnalyzeV2(t *testing.T) {
tk.MustExec("create table small_table_inject_pd(a int)")
tk.MustExec("insert into small_table_inject_pd values(1), (2), (3), (4), (5)")
tk.MustExec("analyze table small_table_inject_pd")
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd"))
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\""))
tk.MustExec(`
create table small_table_inject_pd_with_partition(
a int
Expand All @@ -936,9 +936,9 @@ create table small_table_inject_pd_with_partition(
tk.MustExec("insert into small_table_inject_pd_with_partition values(1), (6), (11)")
tk.MustExec("analyze table small_table_inject_pd_with_partition")
tk.MustQuery("show warnings").Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p2",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p2, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
))
rows := [][]interface{}{
{"global", "a"},
Expand Down Expand Up @@ -1435,7 +1435,7 @@ func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns a with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1503,7 +1503,7 @@ func TestAnalyzeColumnsWithIndex(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns c with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1580,7 +1580,7 @@ func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns c with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1661,8 +1661,8 @@ func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns a with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1813,8 +1813,8 @@ func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns a with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1918,7 +1918,7 @@ func TestAnalyzeColumnsWithExtendedStats(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns b with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1988,7 +1988,7 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns b with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -2110,6 +2110,26 @@ func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) {
}
}

func TestAnalyzeSampleRateReason(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)

tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a int, b int)")
require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll))

tk.MustExec(`analyze table t`)
tk.MustQuery(`show warnings`).Sort().Check(testkit.Rows(
`Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`))

tk.MustExec(`insert into t values (1, 1), (2, 2), (3, 3)`)
require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll))
tk.MustExec(`analyze table t`)
tk.MustQuery(`show warnings`).Sort().Check(testkit.Rows(
`Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "TiDB assumes that the table is empty and cannot get row count from PD, use sample-rate=1"`))
}

func TestAnalyzeColumnsErrorAndWarning(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)

Expand All @@ -2134,7 +2154,7 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) {
// If no predicate column is collected, analyze predicate columns gives a warning and falls back to analyze all columns.
tk.MustExec("analyze table t predicate columns")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
`Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`,
"Warning 1105 No predicate column has been collected yet for table test.t so all columns are analyzed",
))
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Rows()
Expand All @@ -2159,7 +2179,7 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) {
tk.MustExec("analyze table t predicate columns")
}
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
`Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "TiDB assumes that the table is empty and cannot get row count from PD, use sample-rate=1"`,
"Warning 1105 Table test.t has version 1 statistics so all the columns must be analyzed to overwrite the current statistics",
))
}(val)
Expand Down Expand Up @@ -2741,7 +2761,7 @@ PARTITION BY RANGE ( a ) (
// analyze partition with options under dynamic mode
tk.MustExec("analyze table t partition p0 columns a,b,c with 1 topn, 3 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Ignore columns and options when analyze partition in dynamic mode",
"Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`",
"Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`",
Expand All @@ -2755,7 +2775,7 @@ PARTITION BY RANGE ( a ) (

tk.MustExec("analyze table t partition p0")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/9) as the sample-rate=1\"",
"Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`",
"Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`",
))
Expand Down Expand Up @@ -2810,7 +2830,7 @@ PARTITION BY RANGE ( a ) (
tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic'")
tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions",
))

Expand All @@ -2819,7 +2839,7 @@ PARTITION BY RANGE ( a ) (
tk.MustExec("set global tidb_persist_analyze_options = true")
tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/5) as the sample-rate=1\"",
"Warning 1105 Ignore columns and options when analyze partition in dynamic mode",
"Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions",
))
Expand All @@ -2828,7 +2848,7 @@ PARTITION BY RANGE ( a ) (
tk.MustExec("insert into mysql.analyze_options values (?,?,?,?,?,?,?)", pi.Definitions[1].ID, 0, 0, 1, 1, "DEFAULT", "")
tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/5) as the sample-rate=1\"",
"Warning 1105 Ignore columns and options when analyze partition in dynamic mode",
"Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions",
))
Expand Down Expand Up @@ -3233,7 +3253,7 @@ func TestAnalyzeColumnsSkipMVIndexJsonCol(t *testing.T) {

tk.MustExec("analyze table t columns a")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(""+
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns b are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
"Warning 1105 analyzing multi-valued indexes is not supported, skip idx_c"))
tk.MustQuery("select job_info from mysql.analyze_jobs where table_schema = 'test' and table_name = 't'").Check(testkit.Rows(
Expand Down
Loading

0 comments on commit 6de161e

Please sign in to comment.