Skip to content

Commit

Permalink
manual cherry-pick pingcap#39011
Browse files Browse the repository at this point in the history
  • Loading branch information
time-and-fate committed Nov 9, 2022
1 parent 8473f27 commit bb2953e
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 26 deletions.
4 changes: 2 additions & 2 deletions statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -1402,10 +1402,10 @@ func (h *Handle) RecalculateExpectCount(q *statistics.QueryFeedback) error {
expected := 0.0
if isIndex {
idx := t.Indices[id]
expected, err = idx.GetRowCount(sctx, nil, ranges, t.Count)
expected, err = idx.GetRowCount(sctx, nil, ranges, t.Count, t.ModifyCount)
} else {
c := t.Columns[id]
expected, err = c.GetColumnRowCount(sctx, ranges, t.Count, true)
expected, err = c.GetColumnRowCount(sctx, ranges, t.Count, t.ModifyCount, true)
}
q.Expected = int64(expected)
return err
Expand Down
24 changes: 8 additions & 16 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,7 @@ func (hg *Histogram) outOfRange(val types.Datum) bool {

// outOfRangeRowCount estimate the row count of part of [lDatum, rDatum] which is out of range of the histogram.
// Here we assume the density of data is decreasing from the lower/upper bound of the histogram toward outside.
// The maximum row count it can get is the increaseCount. It reaches the maximum when out-of-range width reaches histogram range width.
// The maximum row count it can get is the modifyCount. It reaches the maximum when out-of-range width reaches histogram range width.
// As it shows below. To calculate the out-of-range row count, we need to calculate the percentage of the shaded area.
// Note that we assume histL-boundL == histR-histL == boundR-histR here.
//
Expand All @@ -892,7 +892,7 @@ func (hg *Histogram) outOfRange(val types.Datum) bool {
// boundL │ │histL histR boundR
// │ │
// lDatum rDatum
func (hg *Histogram) outOfRangeRowCount(lDatum, rDatum *types.Datum, increaseCount int64) float64 {
func (hg *Histogram) outOfRangeRowCount(lDatum, rDatum *types.Datum, modifyCount int64) float64 {
if hg.Len() == 0 {
return 0
}
Expand Down Expand Up @@ -976,8 +976,8 @@ func (hg *Histogram) outOfRangeRowCount(lDatum, rDatum *types.Datum, increaseCou
totalPercent = 1
}
rowCount := totalPercent * hg.notNullCount()
if rowCount > float64(increaseCount) {
return float64(increaseCount)
if rowCount > float64(modifyCount) {
return float64(modifyCount)
}
return rowCount
}
Expand Down Expand Up @@ -1202,7 +1202,7 @@ func (c *Column) equalRowCount(sctx sessionctx.Context, val types.Datum, encoded
}

// GetColumnRowCount estimates the row count by a slice of Range.
func (c *Column) GetColumnRowCount(sctx sessionctx.Context, ranges []*ranger.Range, realtimeRowCount int64, pkIsHandle bool) (float64, error) {
func (c *Column) GetColumnRowCount(sctx sessionctx.Context, ranges []*ranger.Range, realtimeRowCount, modifyCount int64, pkIsHandle bool) (float64, error) {
sc := sctx.GetSessionVars().StmtCtx
var rowCount float64
for _, rg := range ranges {
Expand Down Expand Up @@ -1299,11 +1299,7 @@ func (c *Column) GetColumnRowCount(sctx sessionctx.Context, ranges []*ranger.Ran

// handling the out-of-range part
if (c.outOfRange(lowVal) && !lowVal.IsNull()) || c.outOfRange(highVal) {
increaseCount := realtimeRowCount - int64(c.TotalRowCount())
if increaseCount < 0 {
increaseCount = 0
}
cnt += c.Histogram.outOfRangeRowCount(&lowVal, &highVal, increaseCount)
cnt += c.Histogram.outOfRangeRowCount(&lowVal, &highVal, modifyCount)
}

rowCount += cnt
Expand Down Expand Up @@ -1426,7 +1422,7 @@ func (idx *Index) QueryBytes(d []byte) uint64 {

// GetRowCount returns the row count of the given ranges.
// It uses the modifyCount to adjust the influence of modifications on the table.
func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRanges []*ranger.Range, realtimeRowCount int64) (float64, error) {
func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (float64, error) {
sc := sctx.GetSessionVars().StmtCtx
totalCount := float64(0)
isSingleCol := len(idx.Info.Columns) == 1
Expand Down Expand Up @@ -1518,11 +1514,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang

// handling the out-of-range part
if (idx.outOfRange(l) && !(isSingleCol && lowIsNull)) || idx.outOfRange(r) {
increaseCount := realtimeRowCount - int64(idx.TotalRowCount())
if increaseCount < 0 {
increaseCount = 0
}
totalCount += idx.Histogram.outOfRangeRowCount(&l, &r, increaseCount)
totalCount += idx.Histogram.outOfRangeRowCount(&l, &r, modifyCount)
}
}
totalCount = mathutil.Clamp(totalCount, 0, float64(realtimeRowCount))
Expand Down
8 changes: 5 additions & 3 deletions statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func TestOutOfRangeEstimation(t *testing.T) {
statsTbl := h.GetTableStats(table.Meta())
sctx := mock.NewContext()
col := statsTbl.Columns[table.Meta().Columns[0].ID]
count, err := col.GetColumnRowCount(sctx, getRange(900, 900), statsTbl.Count, false)
count, err := col.GetColumnRowCount(sctx, getRange(900, 900), statsTbl.Count, statsTbl.ModifyCount, false)
require.NoError(t, err)
// Because the ANALYZE collect data by random sampling, so the result is not an accurate value.
// so we use a range here.
Expand All @@ -147,8 +147,9 @@ func TestOutOfRangeEstimation(t *testing.T) {
statsSuiteData := statistics.GetStatsSuiteData()
statsSuiteData.GetTestCases(t, &input, &output)
increasedTblRowCount := int64(float64(statsTbl.Count) * 1.5)
modifyCount := int64(float64(statsTbl.Count) * 0.5)
for i, ran := range input {
count, err = col.GetColumnRowCount(sctx, getRange(ran.Start, ran.End), increasedTblRowCount, false)
count, err = col.GetColumnRowCount(sctx, getRange(ran.Start, ran.End), increasedTblRowCount, modifyCount, false)
require.NoError(t, err)
testdata.OnRecord(func() {
output[i].Start = ran.Start
Expand Down Expand Up @@ -551,6 +552,7 @@ func TestSelectivity(t *testing.T) {
require.Truef(t, math.Abs(ratio-tt.selectivity) < eps, "for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio)

histColl.Count *= 10
histColl.ModifyCount = histColl.Count * 9
ratio, _, err = histColl.Selectivity(sctx, sel.Conditions, nil)
require.NoErrorf(t, err, "for %s", tt.exprs)
require.Truef(t, math.Abs(ratio-tt.selectivityAfterIncrease) < eps, "for %s, needed: %v, got: %v", tt.exprs, tt.selectivityAfterIncrease, ratio)
Expand Down Expand Up @@ -762,7 +764,7 @@ func TestSmallRangeEstimation(t *testing.T) {
statsSuiteData := statistics.GetStatsSuiteData()
statsSuiteData.GetTestCases(t, &input, &output)
for i, ran := range input {
count, err := col.GetColumnRowCount(sctx, getRange(ran.Start, ran.End), statsTbl.Count, false)
count, err := col.GetColumnRowCount(sctx, getRange(ran.Start, ran.End), statsTbl.Count, statsTbl.ModifyCount, false)
require.NoError(t, err)
testdata.OnRecord(func() {
output[i].Start = ran.Start
Expand Down
10 changes: 5 additions & 5 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ func (coll *HistColl) GetRowCountByIntColumnRanges(sctx sessionctx.Context, colI
}
return result, nil
}
result, err := c.GetColumnRowCount(sctx, intRanges, coll.Count, true)
result, err := c.GetColumnRowCount(sctx, intRanges, coll.Count, coll.ModifyCount, true)
if sc.EnableOptimizerCETrace {
CETraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, intRanges, "Column Stats", uint64(result))
}
Expand All @@ -484,7 +484,7 @@ func (coll *HistColl) GetRowCountByColumnRanges(sctx sessionctx.Context, colID i
}
return result, err
}
result, err := c.GetColumnRowCount(sctx, colRanges, coll.Count, false)
result, err := c.GetColumnRowCount(sctx, colRanges, coll.Count, coll.ModifyCount, false)
if sc.EnableOptimizerCETrace {
CETraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats", uint64(result))
}
Expand Down Expand Up @@ -517,7 +517,7 @@ func (coll *HistColl) GetRowCountByIndexRanges(sctx sessionctx.Context, idxID in
if idx.CMSketch != nil && idx.StatsVer == Version1 {
result, err = coll.getIndexRowCount(sctx, idxID, indexRanges)
} else {
result, err = idx.GetRowCount(sctx, coll, indexRanges, coll.Count)
result, err = idx.GetRowCount(sctx, coll, indexRanges, coll.Count, coll.ModifyCount)
}
if sc.EnableOptimizerCETrace {
CETraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats", uint64(result))
Expand Down Expand Up @@ -708,7 +708,7 @@ func (coll *HistColl) crossValidationSelectivity(sctx sessionctx.Context, idx *I
Collators: []collate.Collator{idxPointRange.Collators[i]},
}

rowCount, err := col.GetColumnRowCount(sctx, []*ranger.Range{&rang}, coll.Count, col.IsHandle)
rowCount, err := col.GetColumnRowCount(sctx, []*ranger.Range{&rang}, coll.Count, coll.ModifyCount, col.IsHandle)
if err != nil {
return 0, 0, err
}
Expand Down Expand Up @@ -780,7 +780,7 @@ func (coll *HistColl) getIndexRowCount(sctx sessionctx.Context, idxID int64, ind
// on single-column index, use previous way as well, because CMSketch does not contain null
// values in this case.
if rangePosition == 0 || isSingleColIdxNullRange(idx, ran) {
count, err := idx.GetRowCount(sctx, nil, []*ranger.Range{ran}, coll.Count)
count, err := idx.GetRowCount(sctx, nil, []*ranger.Range{ran}, coll.Count, coll.ModifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand Down

0 comments on commit bb2953e

Please sign in to comment.