From dc718cad89a329dade2fef662c00f3bf43a50d57 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Wed, 24 Oct 2018 16:20:22 +0800 Subject: [PATCH] stats: fix estimation for out of range point queries (#8015) --- plan/cbo_test.go | 8 ++++---- statistics/ddl_test.go | 2 +- statistics/histogram.go | 16 ++++++++-------- statistics/selectivity_test.go | 22 ++++++++++++++++++++-- statistics/table.go | 2 +- 5 files changed, 34 insertions(+), 16 deletions(-) diff --git a/plan/cbo_test.go b/plan/cbo_test.go index d57bc25a4bfcc..b23dbbac8fe97 100644 --- a/plan/cbo_test.go +++ b/plan/cbo_test.go @@ -607,16 +607,16 @@ func (s *testAnalyzeSuite) TestLimit(c *C) { } testKit.MustExec("analyze table t") testKit.MustQuery("explain select * from t use index(idx) where a > 1 and b > 1 and c > 1 limit 1").Check(testkit.Rows( - "IndexScan_13 Selection_15 cop table:t, index:a, b, range:(1 +inf,+inf +inf], keep order:false 1.10", - "Selection_15 IndexScan_13 cop gt(test.t.b, 1) 1.00", - "TableScan_14 Selection_16 cop table:t, keep order:false 1.00", + "IndexScan_13 Selection_15 cop table:t, index:a, b, range:(1 +inf,+inf +inf], keep order:false 1.56", + "Selection_15 IndexScan_13 cop gt(test.t.b, 1) 1.25", + "TableScan_14 Selection_16 cop table:t, keep order:false 1.25", "Selection_16 Limit_17 TableScan_14 cop gt(test.t.c, 1) 1.00", "Limit_17 Selection_16 cop offset:0, count:1 1.00", "IndexLookUp_18 Limit_9 root index:Selection_15, table:Limit_17 1.00", "Limit_9 IndexLookUp_18 root offset:0, count:1 1.00", )) testKit.MustQuery("explain select * from t where a > 1 and c > 1 limit 1").Check(testkit.Rows( - "TableScan_11 Selection_12 cop table:t, range:(1,+inf], keep order:false 1.11", + "TableScan_11 Selection_12 cop table:t, range:(1,+inf], keep order:false 1.25", "Selection_12 Limit_15 TableScan_11 cop gt(test.t.c, 1) 1.00", "Limit_15 Selection_12 cop offset:0, count:1 1.00", "TableReader_16 Limit_8 root data:Limit_15 1.00", diff --git a/statistics/ddl_test.go b/statistics/ddl_test.go index 3e060e2cf8c1e..92317973de2a2 100644 --- a/statistics/ddl_test.go +++ b/statistics/ddl_test.go @@ -126,7 +126,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) { c.Assert(count, Equals, float64(2)) count, err = statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(1), tableInfo.Columns[3].ID) c.Assert(err, IsNil) - c.Assert(count, Equals, float64(2)) + c.Assert(count, Equals, float64(0)) testKit.MustExec("alter table t add column c4 datetime NOT NULL default CURRENT_TIMESTAMP") err = h.HandleDDLEvent(<-h.DDLEventCh()) diff --git a/statistics/histogram.go b/statistics/histogram.go index 5b8831f06a39c..3074b4c10ce89 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -637,7 +637,7 @@ func (c *Column) String() string { return c.Histogram.ToString(0) } -func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (float64, error) { +func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, modifyCount int64) (float64, error) { if val.IsNull() { return float64(c.NullCount), nil } @@ -646,7 +646,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (f return 0.0, nil } if c.NDV > 0 && c.outOfRange(val) { - return c.totalRowCount() / (float64(c.NDV)), nil + return float64(modifyCount) / float64(c.NDV), nil } if c.CMSketch != nil { count, err := c.CMSketch.queryValue(sc, val) @@ -667,7 +667,7 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range // the point case. if !rg.LowExclude && !rg.HighExclude { var cnt float64 - cnt, err = c.equalRowCount(sc, rg.LowVal[0]) + cnt, err = c.equalRowCount(sc, rg.LowVal[0], modifyCount) if err != nil { return 0, errors.Trace(err) } @@ -681,14 +681,14 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range cnt += float64(modifyCount) / outOfRangeBetweenRate } if rg.LowExclude { - lowCnt, err := c.equalRowCount(sc, rg.LowVal[0]) + lowCnt, err := c.equalRowCount(sc, rg.LowVal[0], modifyCount) if err != nil { return 0, errors.Trace(err) } cnt -= lowCnt } if !rg.HighExclude { - highCnt, err := c.equalRowCount(sc, rg.HighVal[0]) + highCnt, err := c.equalRowCount(sc, rg.HighVal[0], modifyCount) if err != nil { return 0, errors.Trace(err) } @@ -715,10 +715,10 @@ func (idx *Index) String() string { return idx.Histogram.ToString(len(idx.Info.Columns)) } -func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte) float64 { +func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte, modifyCount int64) float64 { val := types.NewBytesDatum(b) if idx.NDV > 0 && idx.outOfRange(val) { - return idx.totalRowCount() / (float64(idx.NDV)) + return float64(modifyCount) / (float64(idx.NDV)) } if idx.CMSketch != nil { return float64(idx.CMSketch.queryBytes(b)) @@ -740,7 +740,7 @@ func (idx *Index) getRowCount(sc *stmtctx.StatementContext, indexRanges []*range fullLen := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == len(idx.Info.Columns) if fullLen && bytes.Equal(lb, rb) { if !indexRange.LowExclude && !indexRange.HighExclude { - totalCount += idx.equalRowCount(sc, lb) + totalCount += idx.equalRowCount(sc, lb, modifyCount) } continue } diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index cd0dabf7247c0..a8607b09cbec6 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -154,7 +154,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) { }, { exprs: "a >= 1 and b > 1 and a < 2", - selectivity: 0.01817558299, + selectivity: 0.01783264746, }, { exprs: "a >= 1 and c > 1 and a < 2", @@ -170,7 +170,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) { }, { exprs: "b > 1", - selectivity: 0.98148148148, + selectivity: 0.96296296296, }, { exprs: "a > 1 and b < 2 and c > 3 and d < 4 and e > 5", @@ -279,6 +279,24 @@ func (s *testSelectivitySuite) TestEstimationForUnknownValues(c *C) { count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(1, 30)) c.Assert(err, IsNil) c.Assert(count, Equals, 0.0) + + testKit.MustExec("drop table t") + testKit.MustExec("create table t(a int, b int, index idx(b))") + testKit.MustExec("insert into t values (1,1)") + testKit.MustExec("analyze table t") + table, err = s.dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + statsTbl = h.GetTableStats(table.Meta()) + + colID = table.Meta().Columns[0].ID + count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(2, 2)) + c.Assert(err, IsNil) + c.Assert(count, Equals, 0.0) + + idxID = table.Meta().Indices[0].ID + count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(2, 2)) + c.Assert(err, IsNil) + c.Assert(count, Equals, 0.0) } func (s *testSelectivitySuite) TestPrimaryKeySelectivity(c *C) { diff --git a/statistics/table.go b/statistics/table.go index 6e615c3501bbb..3d8e6df9c0436 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -314,7 +314,7 @@ func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Da return float64(t.Count) / pseudoEqualRate, nil } hist := t.Columns[colID] - result, err := hist.equalRowCount(sc, value) + result, err := hist.equalRowCount(sc, value, t.ModifyCount) result *= hist.getIncreaseFactor(t.Count) return result, errors.Trace(err) }