pingcap · zz-jason · Apr 4, 2018 · Apr 2, 2018 · Apr 3, 2018 · Apr 3, 2018
diff --git a/executor/analyze.go b/executor/analyze.go
@@ -27,7 +27,6 @@ import (
 	"github.com/pingcap/tidb/sessionctx/variable"
 	"github.com/pingcap/tidb/statistics"
 	"github.com/pingcap/tidb/tablecodec"
-	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/util/chunk"
 	"github.com/pingcap/tidb/util/ranger"
 	tipb "github.com/pingcap/tipb/go-tipb"
@@ -176,9 +175,8 @@ type AnalyzeIndexExec struct {
 }
 
 func (e *AnalyzeIndexExec) open() error {
-	idxRange := &ranger.NewRange{LowVal: []types.Datum{types.MinNotNullDatum()}, HighVal: []types.Datum{types.MaxValueDatum()}}
 	var builder distsql.RequestBuilder
-	kvReq, err := builder.SetIndexRanges(e.ctx.GetSessionVars().StmtCtx, e.tblInfo.ID, e.idxInfo.ID, []*ranger.NewRange{idxRange}).
+	kvReq, err := builder.SetIndexRanges(e.ctx.GetSessionVars().StmtCtx, e.tblInfo.ID, e.idxInfo.ID, ranger.FullNewRange()).
 		SetAnalyzeRequest(e.analyzePB).
 		SetKeepOrder(true).
 		SetPriority(e.priority).

diff --git a/plan/cbo_test.go b/plan/cbo_test.go
@@ -541,6 +541,47 @@ func (s *testAnalyzeSuite) TestPreparedNullParam(c *C) {
 	cfg.PreparedPlanCache.Capacity = orgCapacity
 }
 
+func (s *testAnalyzeSuite) TestNullCount(c *C) {
+	defer testleak.AfterTest(c)()
+	store, dom, err := newStoreWithBootstrap()
+	c.Assert(err, IsNil)
+	testKit := testkit.NewTestKit(c, store)
+	defer func() {
+		dom.Close()
+		store.Close()
+	}()
+	testKit.MustExec("use test")
+	testKit.MustExec("drop table if exists t")
+	testKit.MustExec("create table t (a int, b int, index idx(a))")
+	testKit.MustExec("insert into t values (null, null), (null, null)")
+	testKit.MustExec("analyze table t")
+	testKit.MustQuery("explain select * from t where a is null").Check(testkit.Rows(
+		"TableScan_5 Selection_6  cop table:t, range:[-inf,+inf], keep order:false 2.00",
+		"Selection_6  TableScan_5 cop isnull(test.t.a) 2.00",
+		"TableReader_7   root data:Selection_6 2.00",
+	))
+	testKit.MustQuery("explain select * from t use index(idx) where a is null").Check(testkit.Rows(
+		"IndexScan_5   cop table:t, index:a, range:[<nil>,<nil>], keep order:false 2.00",
+		"TableScan_6   cop table:t, keep order:false 2.00",
+		"IndexLookUp_7   root index:IndexScan_5, table:TableScan_6 2.00",
+	))
+	h := dom.StatsHandle()
+	h.Clear()
+	h.Lease = 1
+	defer func() { h.Lease = 0 }()
+	c.Assert(h.Update(dom.InfoSchema()), IsNil)
+	testKit.MustQuery("explain select * from t where b = 1").Check(testkit.Rows(
+		"TableScan_5 Selection_6  cop table:t, range:[-inf,+inf], keep order:false 2.00",
+		"Selection_6  TableScan_5 cop eq(test.t.b, 1) 0.00",
+		"TableReader_7   root data:Selection_6 0.00",
+	))
+	testKit.MustQuery("explain select * from t where b < 1").Check(testkit.Rows(
+		"TableScan_5 Selection_6  cop table:t, range:[-inf,+inf], keep order:false 2.00",
+		"Selection_6  TableScan_5 cop lt(test.t.b, 1) 0.00",
+		"TableReader_7   root data:Selection_6 0.00",
+	))
+}
+
 func newStoreWithBootstrap() (kv.Storage, *domain.Domain, error) {
 	store, err := mockstore.NewMockTikvStore()
 	if err != nil {

diff --git a/statistics/boostrap.go b/statistics/boostrap.go
@@ -111,7 +111,7 @@ func initStatsHistograms4Chunk(is infoschema.InfoSchema, tables statsCache, iter
 				continue
 			}
 			hist := NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, totColSize)
-			table.Columns[hist.ID] = &Column{Histogram: *hist, Info: colInfo}
+			table.Columns[hist.ID] = &Column{Histogram: *hist, Info: colInfo, Count: nullCount}
 		}
 	}
 }

diff --git a/statistics/ddl_test.go b/statistics/ddl_test.go
@@ -107,7 +107,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) {
 	statsTbl := do.StatsHandle().GetTableStats(tableInfo)
 	c.Assert(statsTbl.Pseudo, IsFalse)
 	sc := new(stmtctx.StatementContext)
-	c.Assert(statsTbl.ColumnIsInvalid(sc, tableInfo.Columns[2].ID), IsTrue)
+	c.Check(statsTbl.Columns[tableInfo.Columns[2].ID].NullCount, Equals, int64(2))
 	c.Check(statsTbl.Columns[tableInfo.Columns[2].ID].NDV, Equals, int64(0))
 
 	testKit.MustExec("alter table t add column c3 int NOT NULL")

diff --git a/statistics/histogram.go b/statistics/histogram.go
@@ -358,6 +358,10 @@ func (hg *Histogram) greaterAndEqRowCount(value types.Datum) float64 {
 
 // lessRowCount estimates the row count where the column less than value.
 func (hg *Histogram) lessRowCount(value types.Datum) float64 {
+	// all the values is null
+	if hg.Bounds == nil {
+		return 0
+	}
 	index, match := hg.Bounds.LowerBound(0, &value)
 	if index == hg.Bounds.NumRows() {
 		return hg.totalRowCount()
@@ -389,17 +393,17 @@ func (hg *Histogram) betweenRowCount(a, b types.Datum) float64 {
 	lessCountB := hg.lessRowCount(b)
 	// If lessCountA is not less than lessCountB, it may be that they fall to the same bucket and we cannot estimate
 	// the fraction, so we use `totalCount / NDV` to estimate the row count, but the result should not greater than lessCountB.
-	if lessCountA >= lessCountB {
+	if lessCountA >= lessCountB && hg.NDV > 0 {
 		return math.Min(lessCountB, hg.totalRowCount()/float64(hg.NDV))
 	}
 	return lessCountB - lessCountA
 }
 
 func (hg *Histogram) totalRowCount() float64 {
 	if hg.Len() == 0 {
-		return 0
+		return float64(hg.NullCount)
 	}
-	return float64(hg.Buckets[hg.Len()-1].Count)
+	return float64(hg.Buckets[hg.Len()-1].Count + hg.NullCount)
 }
 
 // mergeBuckets is used to merge every two neighbor buckets.
@@ -425,7 +429,7 @@ func (hg *Histogram) mergeBuckets(bucketIdx int) {
 
 // getIncreaseFactor will return a factor of data increasing after the last analysis.
 func (hg *Histogram) getIncreaseFactor(totalCount int64) float64 {
-	columnCount := int64(hg.totalRowCount()) + hg.NullCount
+	columnCount := int64(hg.totalRowCount())
 	if columnCount == 0 {
 		// avoid dividing by 0
 		return 1.0
@@ -615,10 +619,17 @@ func (c *Column) String() string {
 }
 
 func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (float64, error) {
+	if val.IsNull() {
+		return float64(c.NullCount), nil
+	}
 	if c.CMSketch != nil {
 		count, err := c.CMSketch.queryValue(sc, val)
 		return float64(count), errors.Trace(err)
 	}
+	// all the values is null
+	if c.Histogram.Bounds == nil {
+		return 0.0, nil
+	}
 	return c.Histogram.equalRowCount(val), nil
 }
 

diff --git a/statistics/table.go b/statistics/table.go
@@ -131,7 +131,7 @@ func (h *Handle) columnStatsFromStorage(row types.Row, table *Table, tableInfo *
 			col = &Column{
 				Histogram: Histogram{ID: histID, NDV: distinct, NullCount: nullCount, tp: &colInfo.FieldType, LastUpdateVersion: histVer, TotColSize: totColSize},
 				Info:      colInfo,
-				Count:     count}
+				Count:     count + nullCount}
 			break
 		}
 		if col == nil || col.LastUpdateVersion < histVer || loadAll {
@@ -252,7 +252,7 @@ func (t *Table) ColumnIsInvalid(sc *stmtctx.StatementContext, colID int64) bool
 		sc.SetHistogramsNotLoad()
 		histogramNeededColumns.insert(tableColumnID{tableID: t.TableID, columnID: colID})
 	}
-	return !ok || col.Len() == 0
+	return !ok || col.totalRowCount() == 0 || (col.NDV > 0 && col.Len() == 0)
 }
 
 // ColumnGreaterRowCount estimates the row count where the column greater than value.