From dc718cad89a329dade2fef662c00f3bf43a50d57 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Wed, 24 Oct 2018 16:20:22 +0800
Subject: [PATCH] stats: fix estimation for out of range point queries (#8015)

---
 plan/cbo_test.go               |  8 ++++----
 statistics/ddl_test.go         |  2 +-
 statistics/histogram.go        | 16 ++++++++--------
 statistics/selectivity_test.go | 22 ++++++++++++++++++++--
 statistics/table.go            |  2 +-
 5 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/plan/cbo_test.go b/plan/cbo_test.go
index d57bc25a4bfcc..b23dbbac8fe97 100644
--- a/plan/cbo_test.go
+++ b/plan/cbo_test.go
@@ -607,16 +607,16 @@ func (s *testAnalyzeSuite) TestLimit(c *C) {
 	}
 	testKit.MustExec("analyze table t")
 	testKit.MustQuery("explain select * from t use index(idx) where a > 1 and b > 1 and c > 1 limit 1").Check(testkit.Rows(
-		"IndexScan_13 Selection_15  cop table:t, index:a, b, range:(1 +inf,+inf +inf], keep order:false 1.10",
-		"Selection_15  IndexScan_13 cop gt(test.t.b, 1) 1.00",
-		"TableScan_14 Selection_16  cop table:t, keep order:false 1.00",
+		"IndexScan_13 Selection_15  cop table:t, index:a, b, range:(1 +inf,+inf +inf], keep order:false 1.56",
+		"Selection_15  IndexScan_13 cop gt(test.t.b, 1) 1.25",
+		"TableScan_14 Selection_16  cop table:t, keep order:false 1.25",
 		"Selection_16 Limit_17 TableScan_14 cop gt(test.t.c, 1) 1.00",
 		"Limit_17  Selection_16 cop offset:0, count:1 1.00",
 		"IndexLookUp_18 Limit_9  root index:Selection_15, table:Limit_17 1.00",
 		"Limit_9  IndexLookUp_18 root offset:0, count:1 1.00",
 	))
 	testKit.MustQuery("explain select * from t where a > 1 and c > 1 limit 1").Check(testkit.Rows(
-		"TableScan_11 Selection_12  cop table:t, range:(1,+inf], keep order:false 1.11",
+		"TableScan_11 Selection_12  cop table:t, range:(1,+inf], keep order:false 1.25",
 		"Selection_12 Limit_15 TableScan_11 cop gt(test.t.c, 1) 1.00",
 		"Limit_15  Selection_12 cop offset:0, count:1 1.00",
 		"TableReader_16 Limit_8  root data:Limit_15 1.00",
diff --git a/statistics/ddl_test.go b/statistics/ddl_test.go
index 3e060e2cf8c1e..92317973de2a2 100644
--- a/statistics/ddl_test.go
+++ b/statistics/ddl_test.go
@@ -126,7 +126,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) {
 	c.Assert(count, Equals, float64(2))
 	count, err = statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(1), tableInfo.Columns[3].ID)
 	c.Assert(err, IsNil)
-	c.Assert(count, Equals, float64(2))
+	c.Assert(count, Equals, float64(0))
 
 	testKit.MustExec("alter table t add column c4 datetime NOT NULL default CURRENT_TIMESTAMP")
 	err = h.HandleDDLEvent(<-h.DDLEventCh())
diff --git a/statistics/histogram.go b/statistics/histogram.go
index 5b8831f06a39c..3074b4c10ce89 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -637,7 +637,7 @@ func (c *Column) String() string {
 	return c.Histogram.ToString(0)
 }
 
-func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (float64, error) {
+func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, modifyCount int64) (float64, error) {
 	if val.IsNull() {
 		return float64(c.NullCount), nil
 	}
@@ -646,7 +646,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (f
 		return 0.0, nil
 	}
 	if c.NDV > 0 && c.outOfRange(val) {
-		return c.totalRowCount() / (float64(c.NDV)), nil
+		return float64(modifyCount) / float64(c.NDV), nil
 	}
 	if c.CMSketch != nil {
 		count, err := c.CMSketch.queryValue(sc, val)
@@ -667,7 +667,7 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range
 			// the point case.
 			if !rg.LowExclude && !rg.HighExclude {
 				var cnt float64
-				cnt, err = c.equalRowCount(sc, rg.LowVal[0])
+				cnt, err = c.equalRowCount(sc, rg.LowVal[0], modifyCount)
 				if err != nil {
 					return 0, errors.Trace(err)
 				}
@@ -681,14 +681,14 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range
 			cnt += float64(modifyCount) / outOfRangeBetweenRate
 		}
 		if rg.LowExclude {
-			lowCnt, err := c.equalRowCount(sc, rg.LowVal[0])
+			lowCnt, err := c.equalRowCount(sc, rg.LowVal[0], modifyCount)
 			if err != nil {
 				return 0, errors.Trace(err)
 			}
 			cnt -= lowCnt
 		}
 		if !rg.HighExclude {
-			highCnt, err := c.equalRowCount(sc, rg.HighVal[0])
+			highCnt, err := c.equalRowCount(sc, rg.HighVal[0], modifyCount)
 			if err != nil {
 				return 0, errors.Trace(err)
 			}
@@ -715,10 +715,10 @@ func (idx *Index) String() string {
 	return idx.Histogram.ToString(len(idx.Info.Columns))
 }
 
-func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte) float64 {
+func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte, modifyCount int64) float64 {
 	val := types.NewBytesDatum(b)
 	if idx.NDV > 0 && idx.outOfRange(val) {
-		return idx.totalRowCount() / (float64(idx.NDV))
+		return float64(modifyCount) / (float64(idx.NDV))
 	}
 	if idx.CMSketch != nil {
 		return float64(idx.CMSketch.queryBytes(b))
@@ -740,7 +740,7 @@ func (idx *Index) getRowCount(sc *stmtctx.StatementContext, indexRanges []*range
 		fullLen := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == len(idx.Info.Columns)
 		if fullLen && bytes.Equal(lb, rb) {
 			if !indexRange.LowExclude && !indexRange.HighExclude {
-				totalCount += idx.equalRowCount(sc, lb)
+				totalCount += idx.equalRowCount(sc, lb, modifyCount)
 			}
 			continue
 		}
diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go
index cd0dabf7247c0..a8607b09cbec6 100644
--- a/statistics/selectivity_test.go
+++ b/statistics/selectivity_test.go
@@ -154,7 +154,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) {
 		},
 		{
 			exprs:       "a >= 1 and b > 1 and a < 2",
-			selectivity: 0.01817558299,
+			selectivity: 0.01783264746,
 		},
 		{
 			exprs:       "a >= 1 and c > 1 and a < 2",
@@ -170,7 +170,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) {
 		},
 		{
 			exprs:       "b > 1",
-			selectivity: 0.98148148148,
+			selectivity: 0.96296296296,
 		},
 		{
 			exprs:       "a > 1 and b < 2 and c > 3 and d < 4 and e > 5",
@@ -279,6 +279,24 @@ func (s *testSelectivitySuite) TestEstimationForUnknownValues(c *C) {
 	count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(1, 30))
 	c.Assert(err, IsNil)
 	c.Assert(count, Equals, 0.0)
+
+	testKit.MustExec("drop table t")
+	testKit.MustExec("create table t(a int, b int, index idx(b))")
+	testKit.MustExec("insert into t values (1,1)")
+	testKit.MustExec("analyze table t")
+	table, err = s.dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	c.Assert(err, IsNil)
+	statsTbl = h.GetTableStats(table.Meta())
+
+	colID = table.Meta().Columns[0].ID
+	count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(2, 2))
+	c.Assert(err, IsNil)
+	c.Assert(count, Equals, 0.0)
+
+	idxID = table.Meta().Indices[0].ID
+	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(2, 2))
+	c.Assert(err, IsNil)
+	c.Assert(count, Equals, 0.0)
 }
 
 func (s *testSelectivitySuite) TestPrimaryKeySelectivity(c *C) {
diff --git a/statistics/table.go b/statistics/table.go
index 6e615c3501bbb..3d8e6df9c0436 100644
--- a/statistics/table.go
+++ b/statistics/table.go
@@ -314,7 +314,7 @@ func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Da
 		return float64(t.Count) / pseudoEqualRate, nil
 	}
 	hist := t.Columns[colID]
-	result, err := hist.equalRowCount(sc, value)
+	result, err := hist.equalRowCount(sc, value, t.ModifyCount)
 	result *= hist.getIncreaseFactor(t.Count)
 	return result, errors.Trace(err)
 }