Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Planner: Estimate to recognize modifyCount when all TopN collected (#55077) #58149

Open
wants to merge 4 commits into
base: release-8.1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.His
}

// equalRowCountOnColumn estimates the row count by a slice of Range and a Datum.
func equalRowCountOnColumn(sctx context.PlanContext, c *statistics.Column, val types.Datum, encodedVal []byte, realtimeRowCount int64) (result float64, err error) {
func equalRowCountOnColumn(sctx context.PlanContext, c *statistics.Column, val types.Datum, encodedVal []byte, realtimeRowCount, modifyCount int64) (result float64, err error) {
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugtrace.RecordAnyValuesWithNames(sctx, "Value", val.String(), "Encoded", encodedVal)
Expand Down Expand Up @@ -172,7 +172,11 @@ func equalRowCountOnColumn(sctx context.PlanContext, c *statistics.Column, val t
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
if histNDV <= 0 {
return 0, nil
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
if modifyCount == 0 {
return 0, nil
}
return 1, nil
}
return c.Histogram.NotNullCount() / histNDV, nil
}
Expand Down Expand Up @@ -224,7 +228,7 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
continue
}
var cnt float64
cnt, err = equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount)
cnt, err = equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount, modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand All @@ -245,7 +249,7 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
// case 2: it's a small range && using ver1 stats
if rangeVals != nil {
for _, val := range rangeVals {
cnt, err := equalRowCountOnColumn(sctx, c, val, lowEncoded, realtimeRowCount)
cnt, err := equalRowCountOnColumn(sctx, c, val, lowEncoded, realtimeRowCount, modifyCount)
if err != nil {
return 0, err
}
Expand All @@ -269,7 +273,7 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
// And because we use (2, MaxValue] to represent expressions like a > 2 and use [MinNotNull, 3) to represent
// expressions like b < 3, we need to exclude the special values.
if rg.LowExclude && !lowVal.IsNull() && lowVal.Kind() != types.KindMaxValue && lowVal.Kind() != types.KindMinNotNull {
lowCnt, err := equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount)
lowCnt, err := equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount, modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand All @@ -280,7 +284,7 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
cnt += float64(c.NullCount)
}
if !rg.HighExclude && highVal.Kind() != types.KindMaxValue && highVal.Kind() != types.KindMinNotNull {
highCnt, err := equalRowCountOnColumn(sctx, c, highVal, highEncoded, realtimeRowCount)
highCnt, err := equalRowCountOnColumn(sctx, c, highVal, highEncoded, realtimeRowCount, modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand Down Expand Up @@ -376,7 +380,7 @@ func ColumnEqualRowCount(sctx context.PlanContext, t *statistics.Table, value ty
if err != nil {
return 0, err
}
result, err := equalRowCountOnColumn(sctx, c, value, encodedVal, t.ModifyCount)
result, err := equalRowCountOnColumn(sctx, c, value, encodedVal, t.RealtimeCount, t.ModifyCount)
result *= c.GetIncreaseFactor(t.RealtimeCount)
return result, errors.Trace(err)
}
10 changes: 7 additions & 3 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,
}
continue
}
count = equalRowCountOnIndex(sctx, idx, lb, realtimeRowCount)
count = equalRowCountOnIndex(sctx, idx, lb, realtimeRowCount, modifyCount)
// If the current table row count has changed, we should scale the row count accordingly.
count *= idx.GetIncreaseFactor(realtimeRowCount)
if debugTrace {
Expand Down Expand Up @@ -363,7 +363,7 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,

var nullKeyBytes, _ = codec.EncodeKey(time.UTC, nil, types.NewDatum(nil))

func equalRowCountOnIndex(sctx context.PlanContext, idx *statistics.Index, b []byte, realtimeRowCount int64) (result float64) {
func equalRowCountOnIndex(sctx context.PlanContext, idx *statistics.Index, b []byte, realtimeRowCount, modifyCount int64) (result float64) {
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugtrace.RecordAnyValuesWithNames(sctx, "Encoded Value", b)
Expand Down Expand Up @@ -404,7 +404,11 @@ func equalRowCountOnIndex(sctx context.PlanContext, idx *statistics.Index, b []b
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
if histNDV <= 0 {
return 0
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
if modifyCount == 0 {
return 0
}
return 1
}
return idx.Histogram.NotNullCount() / histNDV
}
Expand Down