Skip to content

Commit

Permalink
planner: Use realtimeRowCount when all topN collected (#56848) (#57689)
Browse files Browse the repository at this point in the history
close #47400
  • Loading branch information
ti-chi-bot authored Nov 25, 2024
1 parent ed39273 commit 5bca037
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pkg/planner/cardinality/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":cardinality"],
flaky = True,
shard_count = 28,
shard_count = 29,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
21 changes: 19 additions & 2 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package cardinality

import (
"math"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/planner/planctx"
"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
Expand Down Expand Up @@ -173,12 +175,27 @@ func equalRowCountOnColumn(sctx planctx.PlanContext, c *statistics.Column, val t
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
if histNDV <= 0 {
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
// c.NotNullCount rather than c.Histogram.NotNullCount() since the histograms are empty.
//
// If the table hasn't been modified, it's safe to return 0.
if modifyCount == 0 {
return 0, nil
}
return 1, nil
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if c.Histogram.NDV > 0 {
histNDV = float64(c.Histogram.NDV)
} else {
histNDV = math.Sqrt(max(c.NotNullCount(), float64(realtimeRowCount)))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
totalRowCount := min(c.NotNullCount(), float64(realtimeRowCount)-c.NotNullCount())
return max(1, totalRowCount/histNDV), nil
}
// return the average histogram rows (which excludes topN) and NDV that excluded topN
return c.Histogram.NotNullCount() / histNDV, nil
}

Expand Down
19 changes: 17 additions & 2 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,12 +415,27 @@ func equalRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, b []b
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
if histNDV <= 0 {
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
// If histNDV is zero - we have all NDV's in TopN - and no histograms. This function uses
// idx.TotalRowCount rather than idx.Histogram.NotNullCount() since the histograms are empty.
//
// If the table hasn't been modified, it's safe to return 0.
if modifyCount == 0 {
return 0
}
return 1
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if idx.Histogram.NDV > 0 {
histNDV = float64(idx.Histogram.NDV)
} else {
histNDV = math.Sqrt(max(idx.TotalRowCount(), float64(realtimeRowCount)))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
// "realtimeRowCount - original count" is a better measure of inserts than modifyCount
totalRowCount := min(idx.TotalRowCount(), float64(realtimeRowCount)-idx.TotalRowCount())
return max(1, totalRowCount/histNDV)
}
// return the average histogram rows (which excludes topN) and NDV that excluded topN
return idx.Histogram.NotNullCount() / histNDV
}

Expand Down
50 changes: 50 additions & 0 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,56 @@ func TestEstimationForUnknownValues(t *testing.T) {
require.Equal(t, 0.0, count)
}

func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, key idx(a))")
testKit.MustExec("set @@tidb_analyze_version=2")
testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
for i := 1; i <= 10; i++ {
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i))
testKit.MustExec(fmt.Sprintf("insert into t select a from t where a = %d", i))
}
testKit.MustExec("analyze table t")
h := dom.StatsHandle()
require.Nil(t, h.DumpStatsDeltaToKV(true))

table, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t"))
require.NoError(t, err)
statsTbl := h.GetTableStats(table.Meta())

// Search for a found value == 10.0
sctx := mock.NewContext()
col := statsTbl.GetCol(table.Meta().Columns[0].ID)
count, err := cardinality.GetColumnRowCount(sctx, col, getRange(5, 5), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
require.NoError(t, err)
require.Equal(t, 10.0, count)

// Search for a not found value with zero modifyCount. Defaults to count == 1.0
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(11, 11), statsTbl.RealtimeCount, statsTbl.ModifyCount, false)
require.NoError(t, err)
require.Equal(t, 1.0, count)

// Add another 200 rows to the table
testKit.MustExec("insert into t select a+10 from t")
testKit.MustExec("insert into t select a+10 from t where a <= 10")
require.Nil(t, h.DumpStatsDeltaToKV(true))
require.Nil(t, h.Update(context.Background(), dom.InfoSchema()))
statsTblnew := h.GetTableStats(table.Meta())

// Search for a not found value based upon statistics - count should be >= 10 and <=40
count, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblnew.RealtimeCount, statsTblnew.ModifyCount, false)
require.NoError(t, err)
require.Truef(t, count < 41, "expected: between 10 to 40, got: %v", count)
require.Truef(t, count > 9, "expected: between 10 to 40, got: %v", count)
}

func TestEstimationUniqueKeyEqualConds(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
Expand Down

0 comments on commit 5bca037

Please sign in to comment.