Skip to content

Commit

Permalink
Planner: Estimate to recognize modifyCount when all TopN collected (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
terry1purcell authored Aug 1, 2024
1 parent 194711a commit 2945819
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
18 changes: 11 additions & 7 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.His
}

// equalRowCountOnColumn estimates the row count by a slice of Range and a Datum.
func equalRowCountOnColumn(sctx context.PlanContext, c *statistics.Column, val types.Datum, encodedVal []byte, realtimeRowCount int64) (result float64, err error) {
func equalRowCountOnColumn(sctx context.PlanContext, c *statistics.Column, val types.Datum, encodedVal []byte, realtimeRowCount, modifyCount int64) (result float64, err error) {
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugtrace.RecordAnyValuesWithNames(sctx, "Value", val.String(), "Encoded", encodedVal)
Expand Down Expand Up @@ -172,7 +172,11 @@ func equalRowCountOnColumn(sctx context.PlanContext, c *statistics.Column, val t
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
if histNDV <= 0 {
return 0, nil
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
if modifyCount == 0 {
return 0, nil
}
return 1, nil
}
return c.Histogram.NotNullCount() / histNDV, nil
}
Expand Down Expand Up @@ -224,7 +228,7 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
continue
}
var cnt float64
cnt, err = equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount)
cnt, err = equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount, modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand All @@ -245,7 +249,7 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
// case 2: it's a small range && using ver1 stats
if rangeVals != nil {
for _, val := range rangeVals {
cnt, err := equalRowCountOnColumn(sctx, c, val, lowEncoded, realtimeRowCount)
cnt, err := equalRowCountOnColumn(sctx, c, val, lowEncoded, realtimeRowCount, modifyCount)
if err != nil {
return 0, err
}
Expand All @@ -269,7 +273,7 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
// And because we use (2, MaxValue] to represent expressions like a > 2 and use [MinNotNull, 3) to represent
// expressions like b < 3, we need to exclude the special values.
if rg.LowExclude && !lowVal.IsNull() && lowVal.Kind() != types.KindMaxValue && lowVal.Kind() != types.KindMinNotNull {
lowCnt, err := equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount)
lowCnt, err := equalRowCountOnColumn(sctx, c, lowVal, lowEncoded, realtimeRowCount, modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand All @@ -280,7 +284,7 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
cnt += float64(c.NullCount)
}
if !rg.HighExclude && highVal.Kind() != types.KindMaxValue && highVal.Kind() != types.KindMinNotNull {
highCnt, err := equalRowCountOnColumn(sctx, c, highVal, highEncoded, realtimeRowCount)
highCnt, err := equalRowCountOnColumn(sctx, c, highVal, highEncoded, realtimeRowCount, modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand Down Expand Up @@ -376,7 +380,7 @@ func ColumnEqualRowCount(sctx context.PlanContext, t *statistics.Table, value ty
if err != nil {
return 0, err
}
result, err := equalRowCountOnColumn(sctx, c, value, encodedVal, t.ModifyCount)
result, err := equalRowCountOnColumn(sctx, c, value, encodedVal, t.RealtimeCount, t.ModifyCount)
result *= c.GetIncreaseFactor(t.RealtimeCount)
return result, errors.Trace(err)
}
10 changes: 7 additions & 3 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,
}
continue
}
count = equalRowCountOnIndex(sctx, idx, lb, realtimeRowCount)
count = equalRowCountOnIndex(sctx, idx, lb, realtimeRowCount, modifyCount)
// If the current table row count has changed, we should scale the row count accordingly.
count *= idx.GetIncreaseFactor(realtimeRowCount)
if debugTrace {
Expand Down Expand Up @@ -356,7 +356,7 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,

var nullKeyBytes, _ = codec.EncodeKey(time.UTC, nil, types.NewDatum(nil))

func equalRowCountOnIndex(sctx context.PlanContext, idx *statistics.Index, b []byte, realtimeRowCount int64) (result float64) {
func equalRowCountOnIndex(sctx context.PlanContext, idx *statistics.Index, b []byte, realtimeRowCount, modifyCount int64) (result float64) {
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugtrace.RecordAnyValuesWithNames(sctx, "Encoded Value", b)
Expand Down Expand Up @@ -397,7 +397,11 @@ func equalRowCountOnIndex(sctx context.PlanContext, idx *statistics.Index, b []b
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
if histNDV <= 0 {
return 0
// If the table hasn't been modified, it's safe to return 0. Otherwise, the TopN could be stale - return 1.
if modifyCount == 0 {
return 0
}
return 1
}
return idx.Histogram.NotNullCount() / histNDV
}
Expand Down

0 comments on commit 2945819

Please sign in to comment.