Skip to content

Commit

Permalink
stats: fix estimation for out of range point queries (#8015) (#8493)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx authored and zz-jason committed Nov 28, 2018
1 parent a1276e4 commit f5dd339
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 16 deletions.
8 changes: 4 additions & 4 deletions plan/cbo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -607,16 +607,16 @@ func (s *testAnalyzeSuite) TestLimit(c *C) {
}
testKit.MustExec("analyze table t")
testKit.MustQuery("explain select * from t use index(idx) where a > 1 and b > 1 and c > 1 limit 1").Check(testkit.Rows(
"IndexScan_13 Selection_15 cop table:t, index:a, b, range:(1 +inf,+inf +inf], keep order:false 1.10",
"Selection_15 IndexScan_13 cop gt(test.t.b, 1) 1.00",
"TableScan_14 Selection_16 cop table:t, keep order:false 1.00",
"IndexScan_13 Selection_15 cop table:t, index:a, b, range:(1 +inf,+inf +inf], keep order:false 1.56",
"Selection_15 IndexScan_13 cop gt(test.t.b, 1) 1.25",
"TableScan_14 Selection_16 cop table:t, keep order:false 1.25",
"Selection_16 Limit_17 TableScan_14 cop gt(test.t.c, 1) 1.00",
"Limit_17 Selection_16 cop offset:0, count:1 1.00",
"IndexLookUp_18 Limit_9 root index:Selection_15, table:Limit_17 1.00",
"Limit_9 IndexLookUp_18 root offset:0, count:1 1.00",
))
testKit.MustQuery("explain select * from t where a > 1 and c > 1 limit 1").Check(testkit.Rows(
"TableScan_11 Selection_12 cop table:t, range:(1,+inf], keep order:false 1.11",
"TableScan_11 Selection_12 cop table:t, range:(1,+inf], keep order:false 1.25",
"Selection_12 Limit_15 TableScan_11 cop gt(test.t.c, 1) 1.00",
"Limit_15 Selection_12 cop offset:0, count:1 1.00",
"TableReader_16 Limit_8 root data:Limit_15 1.00",
Expand Down
2 changes: 1 addition & 1 deletion statistics/ddl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) {
c.Assert(count, Equals, float64(2))
count, err = statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(1), tableInfo.Columns[3].ID)
c.Assert(err, IsNil)
c.Assert(count, Equals, float64(2))
c.Assert(count, Equals, float64(0))

testKit.MustExec("alter table t add column c4 datetime NOT NULL default CURRENT_TIMESTAMP")
err = h.HandleDDLEvent(<-h.DDLEventCh())
Expand Down
16 changes: 8 additions & 8 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ func (c *Column) String() string {
return c.Histogram.ToString(0)
}

func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (float64, error) {
func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, modifyCount int64) (float64, error) {
if val.IsNull() {
return float64(c.NullCount), nil
}
Expand All @@ -646,7 +646,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (f
return 0.0, nil
}
if c.NDV > 0 && c.outOfRange(val) {
return c.totalRowCount() / (float64(c.NDV)), nil
return float64(modifyCount) / float64(c.NDV), nil
}
if c.CMSketch != nil {
count, err := c.CMSketch.queryValue(sc, val)
Expand All @@ -667,7 +667,7 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range
// the point case.
if !rg.LowExclude && !rg.HighExclude {
var cnt float64
cnt, err = c.equalRowCount(sc, rg.LowVal[0])
cnt, err = c.equalRowCount(sc, rg.LowVal[0], modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand All @@ -681,14 +681,14 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range
cnt += float64(modifyCount) / outOfRangeBetweenRate
}
if rg.LowExclude {
lowCnt, err := c.equalRowCount(sc, rg.LowVal[0])
lowCnt, err := c.equalRowCount(sc, rg.LowVal[0], modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
cnt -= lowCnt
}
if !rg.HighExclude {
highCnt, err := c.equalRowCount(sc, rg.HighVal[0])
highCnt, err := c.equalRowCount(sc, rg.HighVal[0], modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand All @@ -715,10 +715,10 @@ func (idx *Index) String() string {
return idx.Histogram.ToString(len(idx.Info.Columns))
}

func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte) float64 {
func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte, modifyCount int64) float64 {
val := types.NewBytesDatum(b)
if idx.NDV > 0 && idx.outOfRange(val) {
return idx.totalRowCount() / (float64(idx.NDV))
return float64(modifyCount) / (float64(idx.NDV))
}
if idx.CMSketch != nil {
return float64(idx.CMSketch.queryBytes(b))
Expand All @@ -740,7 +740,7 @@ func (idx *Index) getRowCount(sc *stmtctx.StatementContext, indexRanges []*range
fullLen := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == len(idx.Info.Columns)
if fullLen && bytes.Equal(lb, rb) {
if !indexRange.LowExclude && !indexRange.HighExclude {
totalCount += idx.equalRowCount(sc, lb)
totalCount += idx.equalRowCount(sc, lb, modifyCount)
}
continue
}
Expand Down
22 changes: 20 additions & 2 deletions statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) {
},
{
exprs: "a >= 1 and b > 1 and a < 2",
selectivity: 0.01817558299,
selectivity: 0.01783264746,
},
{
exprs: "a >= 1 and c > 1 and a < 2",
Expand All @@ -170,7 +170,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) {
},
{
exprs: "b > 1",
selectivity: 0.98148148148,
selectivity: 0.96296296296,
},
{
exprs: "a > 1 and b < 2 and c > 3 and d < 4 and e > 5",
Expand Down Expand Up @@ -279,6 +279,24 @@ func (s *testSelectivitySuite) TestEstimationForUnknownValues(c *C) {
count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(1, 30))
c.Assert(err, IsNil)
c.Assert(count, Equals, 0.0)

testKit.MustExec("drop table t")
testKit.MustExec("create table t(a int, b int, index idx(b))")
testKit.MustExec("insert into t values (1,1)")
testKit.MustExec("analyze table t")
table, err = s.dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
statsTbl = h.GetTableStats(table.Meta())

colID = table.Meta().Columns[0].ID
count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(2, 2))
c.Assert(err, IsNil)
c.Assert(count, Equals, 0.0)

idxID = table.Meta().Indices[0].ID
count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(2, 2))
c.Assert(err, IsNil)
c.Assert(count, Equals, 0.0)
}

func (s *testSelectivitySuite) TestPrimaryKeySelectivity(c *C) {
Expand Down
2 changes: 1 addition & 1 deletion statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Da
return float64(t.Count) / pseudoEqualRate, nil
}
hist := t.Columns[colID]
result, err := hist.equalRowCount(sc, value)
result, err := hist.equalRowCount(sc, value, t.ModifyCount)
result *= hist.getIncreaseFactor(t.Count)
return result, errors.Trace(err)
}
Expand Down

0 comments on commit f5dd339

Please sign in to comment.