Skip to content

Commit

Permalink
planner: Use realtimeRowCount when all topN collected (#56848) (#57224)
Browse files Browse the repository at this point in the history
close #47400
  • Loading branch information
ti-chi-bot authored Nov 12, 2024
1 parent 1bc86a2 commit 2d457ca
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 2 deletions.
19 changes: 18 additions & 1 deletion statistics/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,24 @@ func (c *Column) equalRowCount(sctx sessionctx.Context, val types.Datum, encoded
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num()))
if histNDV <= 0 {
return 0, nil
// If there has been no modifcations - return zero
modifiedRows := float64(realtimeRowCount) - c.TotalRowCount()
if modifiedRows == 0 {
return 0, nil
} else if modifiedRows < 0 {
modifiedRows = float64(realtimeRowCount)
}
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if c.Histogram.NDV > 0 {
histNDV = float64(c.Histogram.NDV)
} else {
histNDV = math.Sqrt(math.Min(c.TotalRowCount(), modifiedRows))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
totalRowCount := math.Min(c.TotalRowCount(), modifiedRows)
return math.Max(1, totalRowCount/histNDV), nil
}
return c.Histogram.notNullCount() / histNDV, nil
}
Expand Down
19 changes: 18 additions & 1 deletion statistics/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,24 @@ func (idx *Index) equalRowCount(sctx sessionctx.Context, b []byte, realtimeRowCo
// 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats)
histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num()))
if histNDV <= 0 {
return 0
// If there has been no modifcations - return zero
modifiedRows := float64(realtimeRowCount) - idx.TotalRowCount()
if modifiedRows == 0 {
return 0
} else if modifiedRows < 0 {
modifiedRows = float64(realtimeRowCount)
}
// ELSE calculate an approximate estimate based upon newly inserted rows.
//
// Reset to the original NDV, or if no NDV - derive an NDV using sqrt
if idx.Histogram.NDV > 0 {
histNDV = float64(idx.Histogram.NDV)
} else {
histNDV = math.Sqrt(math.Min(idx.TotalRowCount(), modifiedRows))
}
// As a conservative estimate - take the smaller of the orignal totalRows or the additions.
totalRowCount := math.Min(idx.TotalRowCount(), modifiedRows)
return math.Max(1, totalRowCount/histNDV)
}
return idx.Histogram.notNullCount() / histNDV
}
Expand Down

0 comments on commit 2d457ca

Please sign in to comment.