diff --git a/executor/analyze.go b/executor/analyze.go index f3adc7f2bb33b..dd5c20440638e 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -592,7 +592,8 @@ func finishJobWithLog(sctx sessionctx.Context, job *statistics.AnalyzeJob, analy zap.String("job info", job.JobInfo), zap.Time("start time", job.StartTime), zap.Time("end time", job.EndTime), - zap.String("cost", job.EndTime.Sub(job.StartTime).String())) + zap.String("cost", job.EndTime.Sub(job.StartTime).String()), + zap.String("sample rate reason", job.SampleRateReason)) } } diff --git a/executor/analyze_test.go b/executor/analyze_test.go index a6cdea833df50..ce5e9b4a9130a 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -433,8 +433,8 @@ func TestMergeGlobalStatsWithUnAnalyzedPartition(t *testing.T) { tk.MustExec("analyze table t partition p2 index idxc;") tk.MustQuery("show warnings").Check(testkit.Rows( "Warning 1105 The version 2 would collect all statistics not only the selected indexes", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2")) + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"")) tk.MustExec("analyze table t partition p0;") tk.MustQuery("show warnings").Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0")) + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/2) as the sample-rate=1\"")) } diff --git a/executor/analyzetest/analyze_test.go b/executor/analyzetest/analyze_test.go index d052c8b950873..cae564758b6be 100644 --- a/executor/analyzetest/analyze_test.go +++ b/executor/analyzetest/analyze_test.go @@ -668,14 +668,14 @@ func TestAdjustSampleRateNote(t *testing.T) { result := tk.MustQuery("show stats_meta where table_name = 't'") require.Equal(t, "220000", result.Rows()[0][5]) tk.MustExec("analyze table t") - tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 0.500000 for table test.t")) + tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 0.500000 for table test.t, reason to use this rate is \"use min(1, 110000/220000) as the sample-rate=0.5\"")) tk.MustExec("insert into t values(1),(1),(1)") require.NoError(t, statsHandle.DumpStatsDeltaToKV(handle.DumpAll)) require.NoError(t, statsHandle.Update(is)) result = tk.MustQuery("show stats_meta where table_name = 't'") require.Equal(t, "3", result.Rows()[0][5]) tk.MustExec("analyze table t") - tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t")) + tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\"")) } func TestFastAnalyze4GlobalStats(t *testing.T) { @@ -924,7 +924,7 @@ func TestSmallTableAnalyzeV2(t *testing.T) { tk.MustExec("create table small_table_inject_pd(a int)") tk.MustExec("insert into small_table_inject_pd values(1), (2), (3), (4), (5)") tk.MustExec("analyze table small_table_inject_pd") - tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd")) + tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"")) tk.MustExec(` create table small_table_inject_pd_with_partition( a int @@ -936,9 +936,9 @@ create table small_table_inject_pd_with_partition( tk.MustExec("insert into small_table_inject_pd_with_partition values(1), (6), (11)") tk.MustExec("analyze table small_table_inject_pd_with_partition") tk.MustQuery("show warnings").Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p0", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p1", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p2", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p2, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", )) rows := [][]interface{}{ {"global", "a"}, @@ -1435,7 +1435,7 @@ func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1503,7 +1503,7 @@ func TestAnalyzeColumnsWithIndex(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns c with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1580,7 +1580,7 @@ func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns c with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1661,8 +1661,8 @@ func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1813,8 +1813,8 @@ func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1918,7 +1918,7 @@ func TestAnalyzeColumnsWithExtendedStats(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1988,7 +1988,7 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -2110,6 +2110,26 @@ func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) { } } +func TestAnalyzeSampleRateReason(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t (a int, b int)") + require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll)) + + tk.MustExec(`analyze table t`) + tk.MustQuery(`show warnings`).Sort().Check(testkit.Rows( + `Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`)) + + tk.MustExec(`insert into t values (1, 1), (2, 2), (3, 3)`) + require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll)) + tk.MustExec(`analyze table t`) + tk.MustQuery(`show warnings`).Sort().Check(testkit.Rows( + `Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "TiDB assumes that the table is empty and cannot get row count from PD, use sample-rate=1"`)) +} + func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) @@ -2134,7 +2154,7 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { // If no predicate column is collected, analyze predicate columns gives a warning and falls back to analyze all columns. tk.MustExec("analyze table t predicate columns") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + `Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`, "Warning 1105 No predicate column has been collected yet for table test.t so all columns are analyzed", )) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Rows() @@ -2159,7 +2179,7 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { tk.MustExec("analyze table t predicate columns") } tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + `Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "TiDB assumes that the table is empty and cannot get row count from PD, use sample-rate=1"`, "Warning 1105 Table test.t has version 1 statistics so all the columns must be analyzed to overwrite the current statistics", )) }(val) @@ -2741,7 +2761,7 @@ PARTITION BY RANGE ( a ) ( // analyze partition with options under dynamic mode tk.MustExec("analyze table t partition p0 columns a,b,c with 1 topn, 3 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Ignore columns and options when analyze partition in dynamic mode", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", @@ -2755,7 +2775,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("analyze table t partition p0") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/9) as the sample-rate=1\"", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", )) @@ -2810,7 +2830,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic'") tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions", )) @@ -2819,7 +2839,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("set global tidb_persist_analyze_options = true") tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/5) as the sample-rate=1\"", "Warning 1105 Ignore columns and options when analyze partition in dynamic mode", "Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions", )) @@ -2828,7 +2848,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("insert into mysql.analyze_options values (?,?,?,?,?,?,?)", pi.Definitions[1].ID, 0, 0, 1, 1, "DEFAULT", "") tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/5) as the sample-rate=1\"", "Warning 1105 Ignore columns and options when analyze partition in dynamic mode", "Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions", )) @@ -3233,7 +3253,7 @@ func TestAnalyzeColumnsSkipMVIndexJsonCol(t *testing.T) { tk.MustExec("analyze table t columns a") tk.MustQuery("show warnings").Sort().Check(testkit.Rows(""+ - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns b are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx_c")) tk.MustQuery("select job_info from mysql.analyze_jobs where table_schema = 'test' and table_name = 't'").Check(testkit.Rows( diff --git a/executor/builder.go b/executor/builder.go index 96173435f1364..75afaa3f96ace 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -17,6 +17,7 @@ package executor import ( "bytes" "context" + "fmt" "math" "strconv" "strings" @@ -2599,32 +2600,36 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeC modifyCount = int64(val.(int)) }) sampleRate := new(float64) + var sampleRateReason string if opts[ast.AnalyzeOptNumSamples] == 0 { *sampleRate = math.Float64frombits(opts[ast.AnalyzeOptSampleRate]) if *sampleRate < 0 { - *sampleRate = b.getAdjustedSampleRate(b.ctx, task) + *sampleRate, sampleRateReason = b.getAdjustedSampleRate(b.ctx, task) if task.PartitionName != "" { sc.AppendNote(errors.Errorf( - "Analyze use auto adjusted sample rate %f for table %s.%s's partition %s", + `Analyze use auto adjusted sample rate %f for table %s.%s's partition %s, reason to use this rate is "%s"`, *sampleRate, task.DBName, task.TableName, task.PartitionName, + sampleRateReason, )) } else { sc.AppendNote(errors.Errorf( - "Analyze use auto adjusted sample rate %f for table %s.%s", + `Analyze use auto adjusted sample rate %f for table %s.%s, reason to use this rate is "%s"`, *sampleRate, task.DBName, task.TableName, + sampleRateReason, )) } } } job := &statistics.AnalyzeJob{ - DBName: task.DBName, - TableName: task.TableName, - PartitionName: task.PartitionName, + DBName: task.DBName, + TableName: task.TableName, + PartitionName: task.PartitionName, + SampleRateReason: sampleRateReason, } base := baseAnalyzeExec{ @@ -2681,11 +2686,11 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeC // If we take n = 1e12, a 300*k sample still gives <= 0.66 bin size error with probability 0.99. // So if we don't consider the top-n values, we can keep the sample size at 300*256. // But we may take some top-n before building the histogram, so we increase the sample a little. -func (b *executorBuilder) getAdjustedSampleRate(sctx sessionctx.Context, task plannercore.AnalyzeColumnsTask) float64 { - statsHandle := domain.GetDomain(sctx).StatsHandle() +func (b *executorBuilder) getAdjustedSampleRate(sctx sessionctx.Context, task plannercore.AnalyzeColumnsTask) (sampleRate float64, reason string) { + statsHandle := domain.GetDomain(b.ctx).StatsHandle() defaultRate := 0.001 if statsHandle == nil { - return defaultRate + return defaultRate, fmt.Sprintf("statsHandler is nil, use the default-rate=%v", defaultRate) } var statsTbl *statistics.Table tid := task.TableID.GetStatisticsID() @@ -2697,11 +2702,11 @@ func (b *executorBuilder) getAdjustedSampleRate(sctx sessionctx.Context, task pl approxiCount, hasPD := b.getApproximateTableCountFromStorage(sctx, tid, task) // If there's no stats meta and no pd, return the default rate. if statsTbl == nil && !hasPD { - return defaultRate + return defaultRate, fmt.Sprintf("TiDB cannot get the row count of the table, use the default-rate=%v", defaultRate) } // If the count in stats_meta is still 0 and there's no information from pd side, we scan all rows. if statsTbl.RealtimeCount == 0 && !hasPD { - return 1 + return 1, "TiDB assumes that the table is empty and cannot get row count from PD, use sample-rate=1" } // we have issue https://github.com/pingcap/tidb/issues/29216. // To do a workaround for this issue, we check the approxiCount from the pd side to do a comparison. @@ -2710,15 +2715,17 @@ func (b *executorBuilder) getAdjustedSampleRate(sctx sessionctx.Context, task pl if float64(statsTbl.RealtimeCount*5) < approxiCount { // Confirmed by TiKV side, the experience error rate of the approximate count is about 20%. // So we increase the number to 150000 to reduce this error rate. - return math.Min(1, 150000/approxiCount) + sampleRate = math.Min(1, 150000/approxiCount) + return sampleRate, fmt.Sprintf("Row count in stats_meta is much smaller compared with the row count got by PD, use min(1, 15000/%v) as the sample-rate=%v", approxiCount, sampleRate) } // If we don't go into the above if branch and we still detect the count is zero. Return 1 to prevent the dividing zero. if statsTbl.RealtimeCount == 0 { - return 1 + return 1, "TiDB assumes that the table is empty, use sample-rate=1" } // We are expected to scan about 100000 rows or so. // Since there's tiny error rate around the count from the stats meta, we use 110000 to get a little big result - return math.Min(1, config.DefRowsForSampleRate/float64(statsTbl.RealtimeCount)) + sampleRate = math.Min(1, config.DefRowsForSampleRate/float64(statsTbl.RealtimeCount)) + return sampleRate, fmt.Sprintf("use min(1, %v/%v) as the sample-rate=%v", config.DefRowsForSampleRate, statsTbl.RealtimeCount, sampleRate) } func (b *executorBuilder) getApproximateTableCountFromStorage(sctx sessionctx.Context, tid int64, task plannercore.AnalyzeColumnsTask) (float64, bool) { diff --git a/executor/infoschema_reader_test.go b/executor/infoschema_reader_test.go index 67c3c87eac512..64b423f3dbe24 100644 --- a/executor/infoschema_reader_test.go +++ b/executor/infoschema_reader_test.go @@ -594,7 +594,7 @@ func TestForAnalyzeStatus(t *testing.T) { tk.MustExec("create table t1 (a int, b int, index idx(a))") tk.MustExec("insert into t1 values (1,2),(3,4)") tk.MustExec("analyze table t1") - tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t1")) // 1 note. + tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"")) // 1 note. require.NoError(t, dom.StatsHandle().LoadNeededHistograms()) tk.MustExec("CREATE ROLE r_t1 ;") tk.MustExec("GRANT ALL PRIVILEGES ON test.t1 TO r_t1;") diff --git a/planner/core/indexmerge_path_test.go b/planner/core/indexmerge_path_test.go index e676b82d6f1a1..5efdce0b2cf1a 100644 --- a/planner/core/indexmerge_path_test.go +++ b/planner/core/indexmerge_path_test.go @@ -39,12 +39,12 @@ index idx2(a, b, (cast(j->'$.str' as char(10) array)), c))`) tk.MustExec("set tidb_analyze_version=2") tk.MustExec("analyze table t") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx2")) tk.MustExec("analyze table t index idx") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"TiDB assumes that the table is empty and cannot get row count from PD, use sample-rate=1\"", "Warning 1105 The version 2 would collect all statistics not only the selected indexes", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx2")) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index b667281a4a0ca..dbaf58752ddfa 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -2952,7 +2952,7 @@ func TestIncrementalAnalyzeStatsVer2(t *testing.T) { require.Len(t, warns, 3) require.EqualError(t, warns[0].Err, "The version 2 would collect all statistics not only the selected indexes") require.EqualError(t, warns[1].Err, "The version 2 stats would ignore the INCREMENTAL keyword and do full sampling") - require.EqualError(t, warns[2].Err, "Analyze use auto adjusted sample rate 1.000000 for table test.t") + require.EqualError(t, warns[2].Err, "Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\"") rows = tk.MustQuery(fmt.Sprintf("select distinct_count from mysql.stats_histograms where table_id = %d and is_index = 1", tblID)).Rows() require.Len(t, rows, 1) require.Equal(t, "6", rows[0][0]) diff --git a/statistics/analyze_jobs.go b/statistics/analyze_jobs.go index 34ae95be58a3c..4f8cb2af2e401 100644 --- a/statistics/analyze_jobs.go +++ b/statistics/analyze_jobs.go @@ -28,7 +28,9 @@ type AnalyzeJob struct { JobInfo string StartTime time.Time EndTime time.Time - Progress AnalyzeProgress + + Progress AnalyzeProgress + SampleRateReason string // why this sample-rate is chosen } // AnalyzeProgress represents the process of one analyze job. diff --git a/statistics/handle/handletest/handle_test.go b/statistics/handle/handletest/handle_test.go index df2562b1d2f4b..dd18db4b46ef7 100644 --- a/statistics/handle/handletest/handle_test.go +++ b/statistics/handle/handletest/handle_test.go @@ -2186,7 +2186,7 @@ func TestFMSWithAnalyzePartition(t *testing.T) { tk.MustQuery("select count(*) from mysql.stats_fm_sketch").Check(testkit.Rows("0")) tk.MustExec("analyze table t partition p0 with 1 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Ignore columns and options when analyze partition in dynamic mode", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", diff --git a/statistics/integration_test.go b/statistics/integration_test.go index ec6a9f0aaffb2..e998753560109 100644 --- a/statistics/integration_test.go +++ b/statistics/integration_test.go @@ -169,7 +169,7 @@ func TestChangeVerTo2BehaviorWithPersistedOptions(t *testing.T) { tk.MustExec("analyze table t index idx") tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 The analyze version from the session is not compatible with the existing statistics of the table. Use the existing version instead", "Warning 1105 The version 2 would collect all statistics not only the selected indexes", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t")) // since fallback to ver2 path, should do samplerate adjustment + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\"")) // since fallback to ver2 path, should do samplerate adjustment require.NoError(t, h.Update(is)) statsTblT = h.GetTableStats(tblT.Meta()) for _, idx := range statsTblT.Indices { @@ -178,7 +178,7 @@ func TestChangeVerTo2BehaviorWithPersistedOptions(t *testing.T) { tk.MustExec("analyze table t index") tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 The analyze version from the session is not compatible with the existing statistics of the table. Use the existing version instead", "Warning 1105 The version 2 would collect all statistics not only the selected indexes", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t")) + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\"")) require.NoError(t, h.Update(is)) statsTblT = h.GetTableStats(tblT.Meta()) for _, idx := range statsTblT.Indices {