From ced25893683b22498207d4f73fc61e006d320c20 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Sun, 5 May 2019 15:08:54 +0800
Subject: [PATCH 1/4] executor, stats: support incremental analyze with
 feedback

---
 executor/analyze.go            | 28 +++++++---------
 executor/analyze_test.go       | 36 +++++++++++++++++++++
 executor/builder.go            | 46 +++++++++++++++++++-------
 executor/executor_test.go      |  1 +
 session/bootstrap.go           | 10 ++++++
 session/session.go             |  2 +-
 statistics/cmsketch.go         | 22 +++++++++++++
 statistics/handle/bootstrap.go | 16 +++++----
 statistics/handle/handle.go    | 49 ++++++++++++++++++----------
 statistics/handle/update.go    |  2 ++
 statistics/histogram.go        | 59 +++++++++++++---------------------
 11 files changed, 182 insertions(+), 89 deletions(-)

diff --git a/executor/analyze.go b/executor/analyze.go
index 5c27ba9e17d39..d52a0afeafafe 100644
--- a/executor/analyze.go
+++ b/executor/analyze.go
@@ -1075,13 +1075,13 @@ func (e *AnalyzeTestFastExec) TestFastSample() error {
 
 type analyzeIndexIncrementalExec struct {
 	AnalyzeIndexExec
-	index *statistics.Index
+	oldHist *statistics.Histogram
+	oldCMS  *statistics.CMSketch
 }
 
 func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult {
-	idx := idxExec.index
-	highBound := idx.Histogram.GetUpper(idx.Len() - 1)
-	values, err := codec.Decode(highBound.GetBytes(), len(idxExec.idxInfo.Columns))
+	startPos := idxExec.oldHist.GetUpper(idxExec.oldHist.Len() - 1)
+	values, err := codec.DecodeRange(startPos.GetBytes(), len(idxExec.idxInfo.Columns))
 	if err != nil {
 		return analyzeResult{Err: err, job: idxExec.job}
 	}
@@ -1090,16 +1090,12 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult
 	if err != nil {
 		return analyzeResult{Err: err, job: idxExec.job}
 	}
-	oldHist, oldCMS, err := idx.RemoveUpperBound(idxExec.ctx.GetSessionVars().StmtCtx, values)
+	hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.maxNumBuckets))
 	if err != nil {
 		return analyzeResult{Err: err, job: idxExec.job}
 	}
-	hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, oldHist, hist, int(idxExec.maxNumBuckets))
-	if err != nil {
-		return analyzeResult{Err: err, job: idxExec.job}
-	}
-	if oldCMS != nil && cms != nil {
-		err = cms.MergeCMSketch(oldCMS)
+	if idxExec.oldCMS != nil && cms != nil {
+		err = cms.MergeCMSketch4IncrementalAnalyze(idxExec.oldCMS)
 		if err != nil {
 			return analyzeResult{Err: err, job: idxExec.job}
 		}
@@ -1120,26 +1116,24 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult
 
 type analyzePKIncrementalExec struct {
 	AnalyzeColumnsExec
-	pkStats *statistics.Column
+	oldHist *statistics.Histogram
 }
 
 func analyzePKIncremental(colExec *analyzePKIncrementalExec) analyzeResult {
-	pkStats := colExec.pkStats
-	high := pkStats.GetUpper(pkStats.Len() - 1)
 	var maxVal types.Datum
 	if mysql.HasUnsignedFlag(colExec.pkInfo.Flag) {
 		maxVal = types.NewUintDatum(math.MaxUint64)
 	} else {
 		maxVal = types.NewIntDatum(math.MaxInt64)
 	}
-	ran := ranger.Range{LowVal: []types.Datum{*high}, LowExclude: true, HighVal: []types.Datum{maxVal}}
+	startPos := *colExec.oldHist.GetUpper(colExec.oldHist.Len() - 1)
+	ran := ranger.Range{LowVal: []types.Datum{startPos}, LowExclude: true, HighVal: []types.Datum{maxVal}}
 	hists, _, err := colExec.buildStats([]*ranger.Range{&ran})
 	if err != nil {
 		return analyzeResult{Err: err, job: colExec.job}
 	}
 	hist := hists[0]
-	oldHist := pkStats.Histogram.Copy()
-	hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, oldHist, hist, int(colExec.maxNumBuckets))
+	hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.maxNumBuckets))
 	if err != nil {
 		return analyzeResult{Err: err, job: colExec.job}
 	}
diff --git a/executor/analyze_test.go b/executor/analyze_test.go
index c33880d4f4b90..e4d1225a098b4 100644
--- a/executor/analyze_test.go
+++ b/executor/analyze_test.go
@@ -24,9 +24,13 @@ import (
 	"github.com/pingcap/tidb/executor"
 	"github.com/pingcap/tidb/session"
 	"github.com/pingcap/tidb/sessionctx"
+	"github.com/pingcap/tidb/statistics"
+	"github.com/pingcap/tidb/statistics/handle"
 	"github.com/pingcap/tidb/store/mockstore"
 	"github.com/pingcap/tidb/store/mockstore/mocktikv"
 	"github.com/pingcap/tidb/table"
+	"github.com/pingcap/tidb/types"
+	"github.com/pingcap/tidb/util/codec"
 	"github.com/pingcap/tidb/util/testkit"
 )
 
@@ -303,4 +307,36 @@ func (s *testSuite1) TestAnalyzeIncremental(c *C) {
 	tk.MustExec("analyze incremental table t index")
 	// Result should not change.
 	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  a 0 1 2 1 2 2", "test t  idx 1 0 1 1 1 1", "test t  idx 1 1 2 1 2 2"))
+
+	// Test analyze incremental with feedback.
+	tk.MustExec("insert into t values (3,3)")
+	oriProbability := statistics.FeedbackProbability.Load()
+	defer func() {
+		statistics.FeedbackProbability.Store(oriProbability)
+	}()
+	statistics.FeedbackProbability.Store(1)
+	is := s.dom.InfoSchema()
+	table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	c.Assert(err, IsNil)
+	tblInfo := table.Meta()
+	tk.MustQuery("select * from t use index(idx) where b = 3")
+	tk.MustQuery("select * from t where a > 1")
+	h := s.dom.StatsHandle()
+	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
+	c.Assert(h.DumpStatsFeedbackToKV(), IsNil)
+	c.Assert(h.HandleUpdateStats(is), IsNil)
+	h.Update(is)
+	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  a 0 1 3 0 2 2147483647", "test t  idx 1 0 1 1 1 1", "test t  idx 1 1 2 1 2 2"))
+	tblStats := h.GetTableStats(tblInfo)
+	val, err := codec.EncodeKey(tk.Se.GetSessionVars().StmtCtx, nil, types.NewIntDatum(3))
+	c.Assert(err, IsNil)
+	c.Assert(tblStats.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(val), Equals, uint64(1))
+	c.Assert(statistics.IsAnalyzed(tblStats.Indices[tblInfo.Indices[0].ID].Flag), IsFalse)
+	c.Assert(statistics.IsAnalyzed(tblStats.Columns[tblInfo.Columns[0].ID].Flag), IsFalse)
+
+	tk.MustExec("analyze incremental table t index")
+	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  a 0 1 2 1 2 2", "test t  a 0 2 3 1 3 3",
+		"test t  idx 1 0 1 1 1 1", "test t  idx 1 1 2 1 2 2", "test t  idx 1 2 3 1 3 3"))
+	tblStats = h.GetTableStats(tblInfo)
+	c.Assert(tblStats.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(val), Equals, uint64(1))
 }
diff --git a/executor/builder.go b/executor/builder.go
index 596b64c72b48c..10b819123fe2f 100644
--- a/executor/builder.go
+++ b/executor/builder.go
@@ -1385,18 +1385,28 @@ func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeI
 		return analyzeTask
 	}
 	idx, ok := statsTbl.Indices[task.IndexInfo.ID]
-	// TODO: If the index contains feedback, we may use other strategy.
-	if !ok || idx.Len() == 0 || idx.ContainsFeedback() {
+	if !ok || idx.Len() == 0 || idx.LastAnalyzePos.IsNull() {
 		return analyzeTask
 	}
-	exec := analyzeTask.idxExec
-	if idx.CMSketch != nil {
-		width, depth := idx.CMSketch.GetWidthAndDepth()
-		exec.analyzePB.IdxReq.CmsketchWidth = &width
-		exec.analyzePB.IdxReq.CmsketchDepth = &depth
+	var oldHist *statistics.Histogram
+	if statistics.IsAnalyzed(idx.Flag) {
+		exec := analyzeTask.idxExec
+		if idx.CMSketch != nil {
+			width, depth := idx.CMSketch.GetWidthAndDepth()
+			exec.analyzePB.IdxReq.CmsketchWidth = &width
+			exec.analyzePB.IdxReq.CmsketchDepth = &depth
+		}
+		oldHist = idx.Histogram.Copy()
+	} else {
+		_, bktID := idx.LessRowCountWithBktIdx(idx.LastAnalyzePos)
+		if bktID == 0 {
+			return analyzeTask
+		}
+		oldHist = idx.TruncateHistogram(bktID)
 	}
+	oldHist = idx.Histogram.RemoveUpperBound()
 	analyzeTask.taskType = idxIncrementalTask
-	analyzeTask.idxIncrementalExec = &analyzeIndexIncrementalExec{AnalyzeIndexExec: *analyzeTask.idxExec, index: idx}
+	analyzeTask.idxIncrementalExec = &analyzeIndexIncrementalExec{AnalyzeIndexExec: *analyzeTask.idxExec, oldHist: oldHist, oldCMS: idx.CMSketch}
 	analyzeTask.job = &statistics.AnalyzeJob{DBName: task.DBName, TableName: task.TableName, PartitionName: task.PartitionName, JobInfo: "analyze incremental index " + task.IndexInfo.Name.O}
 	return analyzeTask
 }
@@ -1445,13 +1455,27 @@ func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColu
 		return analyzeTask
 	}
 	col, ok := statsTbl.Columns[task.PKInfo.ID]
-	// TODO: If the primary key contains feedback, we may use other strategy.
-	if !ok || col.Len() == 0 || col.ContainsFeedback() {
+	if !ok || col.Len() == 0 || col.LastAnalyzePos.IsNull() {
 		return analyzeTask
 	}
+	var oldHist *statistics.Histogram
+	if statistics.IsAnalyzed(col.Flag) {
+		oldHist = col.Histogram.Copy()
+	} else {
+		d, err := col.LastAnalyzePos.ConvertTo(b.ctx.GetSessionVars().StmtCtx, col.Tp)
+		if err != nil {
+			b.err = err
+			return nil
+		}
+		_, bktID := col.LessRowCountWithBktIdx(d)
+		if bktID == 0 {
+			return analyzeTask
+		}
+		oldHist = col.TruncateHistogram(bktID)
+	}
 	exec := analyzeTask.colExec
 	analyzeTask.taskType = pkIncrementalTask
-	analyzeTask.colIncrementalExec = &analyzePKIncrementalExec{AnalyzeColumnsExec: *exec, pkStats: col}
+	analyzeTask.colIncrementalExec = &analyzePKIncrementalExec{AnalyzeColumnsExec: *exec, oldHist: oldHist}
 	analyzeTask.job = &statistics.AnalyzeJob{DBName: task.DBName, TableName: task.TableName, PartitionName: task.PartitionName, JobInfo: "analyze incremental primary key"}
 	return analyzeTask
 }
diff --git a/executor/executor_test.go b/executor/executor_test.go
index 9f24b23c0206a..e9b34d9b6a908 100644
--- a/executor/executor_test.go
+++ b/executor/executor_test.go
@@ -2507,6 +2507,7 @@ func (s *testSuite1) SetUpSuite(c *C) {
 	c.Assert(err, IsNil)
 	s.dom, err = session.BootstrapSession(s.store)
 	c.Assert(err, IsNil)
+	s.dom.SetStatsUpdating(true)
 }
 
 func (s *testSuite1) TearDownSuite(c *C) {
diff --git a/session/bootstrap.go b/session/bootstrap.go
index 34a0503a051a7..94beedfc41ba4 100644
--- a/session/bootstrap.go
+++ b/session/bootstrap.go
@@ -171,6 +171,7 @@ const (
 		stats_ver bigint(64) NOT NULL DEFAULT 0,
 		flag bigint(64) NOT NULL DEFAULT 0,
 		correlation double NOT NULL DEFAULT 0,
+		last_analyze_pos blob DEFAULT NULL,
 		unique index tbl(table_id, is_index, hist_id)
 	);`
 
@@ -328,6 +329,7 @@ const (
 	version28 = 28
 	version29 = 29
 	version30 = 30
+	version31 = 31
 )
 
 func checkBootstrapped(s Session) (bool, error) {
@@ -507,6 +509,10 @@ func upgrade(s Session) {
 		upgradeToVer30(s)
 	}
 
+	if ver < version31 {
+		upgradeToVer31(s)
+	}
+
 	updateBootstrapVer(s)
 	_, err = s.Execute(context.Background(), "COMMIT")
 
@@ -799,6 +805,10 @@ func upgradeToVer30(s Session) {
 	mustExecute(s, CreateStatsTopNTable)
 }
 
+func upgradeToVer31(s Session) {
+	doReentrantDDL(s, "ALTER TABLE mysql.stats_histograms ADD COLUMN `last_analyze_pos` blob default null", infoschema.ErrColumnExists)
+}
+
 // updateBootstrapVer updates bootstrap version variable in mysql.TiDB table.
 func updateBootstrapVer(s Session) {
 	// Update bootstrap version.
diff --git a/session/session.go b/session/session.go
index 1f504b9dd4ddd..653340921600d 100644
--- a/session/session.go
+++ b/session/session.go
@@ -1558,7 +1558,7 @@ func createSessionWithDomain(store kv.Storage, dom *domain.Domain) (*session, er
 
 const (
 	notBootstrapped         = 0
-	currentBootstrapVersion = 30
+	currentBootstrapVersion = 31
 )
 
 func getStoreBootstrapVersion(store kv.Storage) int64 {
diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go
index 6c58fe47abab9..80510ded742f2 100644
--- a/statistics/cmsketch.go
+++ b/statistics/cmsketch.go
@@ -298,6 +298,28 @@ func (c *CMSketch) MergeCMSketch(rc *CMSketch) error {
 	return nil
 }
 
+// MergeCMSketch4IncrementalAnalyze merges two CM Sketch for incremental analyze.
+// Since there is no value that appears partially in `c` and `rc`, we can just
+// merge them using `max` operations.
+func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch) error {
+	if c.depth != rc.depth || c.width != rc.width {
+		return errors.New("Dimensions of Count-Min Sketch should be the same")
+	}
+	if c.topN != nil || rc.topN != nil {
+		return errors.New("CMSketch with Top-N does not support merge")
+	}
+	for i := range c.table {
+		for j := range c.table[i] {
+			c.table[i][j] = mathutil.MaxUint32(c.table[i][j], rc.table[i][j])
+		}
+	}
+	c.count = 0
+	for i := range c.table[0] {
+		c.count += uint64(c.table[0][i])
+	}
+	return nil
+}
+
 // CMSketchToProto converts CMSketch to its protobuf representation.
 func CMSketchToProto(c *CMSketch) *tipb.CMSketch {
 	protoSketch := &tipb.CMSketch{Rows: make([]*tipb.CMSketchRow, c.depth)}
diff --git a/statistics/handle/bootstrap.go b/statistics/handle/bootstrap.go
index a461bc6d6911d..3a9109454495b 100644
--- a/statistics/handle/bootstrap.go
+++ b/statistics/handle/bootstrap.go
@@ -109,7 +109,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables Stat
 				terror.Log(errors.Trace(err))
 			}
 			hist := statistics.NewHistogram(id, ndv, nullCount, version, types.NewFieldType(mysql.TypeBlob), chunk.InitialCapacity, 0)
-			table.Indices[hist.ID] = &statistics.Index{Histogram: *hist, CMSketch: cms, Info: idxInfo, StatsVer: row.GetInt64(8)}
+			table.Indices[hist.ID] = &statistics.Index{Histogram: *hist, CMSketch: cms, Info: idxInfo, StatsVer: row.GetInt64(8), Flag: row.GetInt64(10), LastAnalyzePos: row.GetDatum(11, types.NewFieldType(mysql.TypeBlob))}
 		} else {
 			var colInfo *model.ColumnInfo
 			for _, col := range tbl.Meta().Columns {
@@ -124,11 +124,13 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables Stat
 			hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, totColSize)
 			hist.Correlation = row.GetFloat64(9)
 			table.Columns[hist.ID] = &statistics.Column{
-				Histogram:  *hist,
-				PhysicalID: table.PhysicalID,
-				Info:       colInfo,
-				Count:      nullCount,
-				IsHandle:   tbl.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag),
+				Histogram:      *hist,
+				PhysicalID:     table.PhysicalID,
+				Info:           colInfo,
+				Count:          nullCount,
+				IsHandle:       tbl.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag),
+				Flag:           row.GetInt64(10),
+				LastAnalyzePos: row.GetDatum(11, types.NewFieldType(mysql.TypeBlob)),
 			}
 		}
 	}
@@ -137,7 +139,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables Stat
 func (h *Handle) initStatsHistograms(is infoschema.InfoSchema, tables StatsCache) error {
 	h.mu.Lock()
 	defer h.mu.Unlock()
-	sql := "select HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation from mysql.stats_histograms"
+	sql := "select HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms"
 	rc, err := h.mu.ctx.(sqlexec.SQLExecutor).Execute(context.TODO(), sql)
 	if len(rc) > 0 {
 		defer terror.Call(rc[0].Close)
diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go
index 8432fdfba2535..91b82c9cb63c5 100644
--- a/statistics/handle/handle.go
+++ b/statistics/handle/handle.go
@@ -336,7 +336,8 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *statistics.Table, t
 	nullCount := row.GetInt64(5)
 	idx := table.Indices[histID]
 	errorRate := statistics.ErrorRate{}
-	if statistics.IsAnalyzed(row.GetInt64(8)) {
+	flag := row.GetInt64(8)
+	if statistics.IsAnalyzed(flag) {
 		h.mu.Lock()
 		h.mu.rateMap.clear(table.PhysicalID, histID, true)
 		h.mu.Unlock()
@@ -356,7 +357,7 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *statistics.Table, t
 			if err != nil {
 				return errors.Trace(err)
 			}
-			idx = &statistics.Index{Histogram: *hg, CMSketch: cms, Info: idxInfo, ErrorRate: errorRate, StatsVer: row.GetInt64(7)}
+			idx = &statistics.Index{Histogram: *hg, CMSketch: cms, Info: idxInfo, ErrorRate: errorRate, StatsVer: row.GetInt64(7), Flag: flag, LastAnalyzePos: row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))}
 		}
 		break
 	}
@@ -377,7 +378,8 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *statistics.Table,
 	correlation := row.GetFloat64(9)
 	col := table.Columns[histID]
 	errorRate := statistics.ErrorRate{}
-	if statistics.IsAnalyzed(row.GetInt64(8)) {
+	flag := row.GetInt64(8)
+	if statistics.IsAnalyzed(flag) {
 		h.mu.Lock()
 		h.mu.rateMap.clear(table.PhysicalID, histID, false)
 		h.mu.Unlock()
@@ -404,12 +406,14 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *statistics.Table,
 				return errors.Trace(err)
 			}
 			col = &statistics.Column{
-				PhysicalID: table.PhysicalID,
-				Histogram:  *statistics.NewHistogram(histID, distinct, nullCount, histVer, &colInfo.FieldType, 0, totColSize),
-				Info:       colInfo,
-				Count:      count + nullCount,
-				ErrorRate:  errorRate,
-				IsHandle:   tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag),
+				PhysicalID:     table.PhysicalID,
+				Histogram:      *statistics.NewHistogram(histID, distinct, nullCount, histVer, &colInfo.FieldType, 0, totColSize),
+				Info:           colInfo,
+				Count:          count + nullCount,
+				ErrorRate:      errorRate,
+				IsHandle:       tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag),
+				Flag:           flag,
+				LastAnalyzePos: row.GetDatum(10, types.NewFieldType(mysql.TypeBlob)),
 			}
 			col.Histogram.Correlation = correlation
 			break
@@ -424,13 +428,15 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *statistics.Table,
 				return errors.Trace(err)
 			}
 			col = &statistics.Column{
-				PhysicalID: table.PhysicalID,
-				Histogram:  *hg,
-				Info:       colInfo,
-				CMSketch:   cms,
-				Count:      int64(hg.TotalRowCount()),
-				ErrorRate:  errorRate,
-				IsHandle:   tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag),
+				PhysicalID:     table.PhysicalID,
+				Histogram:      *hg,
+				Info:           colInfo,
+				CMSketch:       cms,
+				Count:          int64(hg.TotalRowCount()),
+				ErrorRate:      errorRate,
+				IsHandle:       tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag),
+				Flag:           flag,
+				LastAnalyzePos: row.GetDatum(10, types.NewFieldType(mysql.TypeBlob)),
 			}
 			break
 		}
@@ -472,7 +478,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, physicalID in
 		table = table.Copy()
 	}
 	table.Pseudo = false
-	selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation from mysql.stats_histograms where table_id = %d", physicalID)
+	selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation, last_analyze_pos from mysql.stats_histograms where table_id = %d", physicalID)
 	rows, _, err := h.restrictedExec.ExecRestrictedSQL(nil, selSQL)
 	if err != nil {
 		return nil, errors.Trace(err)
@@ -558,6 +564,7 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg
 		return
 	}
 	sc := h.mu.ctx.GetSessionVars().StmtCtx
+	var lastAnalyzePos []byte
 	for i := range hg.Buckets {
 		count := hg.Buckets[i].Count
 		if i > 0 {
@@ -568,6 +575,7 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg
 		if err != nil {
 			return
 		}
+		lastAnalyzePos = upperBound.GetBytes()
 		var lowerBound types.Datum
 		lowerBound, err = hg.GetLower(i).ConvertTo(sc, types.NewFieldType(mysql.TypeBlob))
 		if err != nil {
@@ -579,6 +587,13 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg
 			return
 		}
 	}
+	if isAnalyzed == 1 && len(lastAnalyzePos) > 0 {
+		sql = fmt.Sprintf("update mysql.stats_histograms set last_analyze_pos = X'%X' where table_id = %d and is_index = %d and hist_id = %d", lastAnalyzePos, tableID, isIndex, hg.ID)
+		_, err = exec.Execute(ctx, sql)
+		if err != nil {
+			return
+		}
+	}
 	return
 }
 
diff --git a/statistics/handle/update.go b/statistics/handle/update.go
index 24ad2fe7e1c85..3d10bd036dc31 100644
--- a/statistics/handle/update.go
+++ b/statistics/handle/update.go
@@ -436,6 +436,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) {
 			newIdx.CMSketch = statistics.UpdateCMSketch(idx.CMSketch, eqFB)
 			newIdx.Histogram = *statistics.UpdateHistogram(&idx.Histogram, &statistics.QueryFeedback{Feedback: ranFB})
 			newIdx.Histogram.PreCalculateScalar()
+			newIdx.Flag = 0
 			newTblStats.Indices[fb.Hist.ID] = &newIdx
 		} else {
 			col, ok := tblStats.Columns[fb.Hist.ID]
@@ -448,6 +449,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) {
 			newFB := &statistics.QueryFeedback{Feedback: ranFB}
 			newFB = newFB.DecodeIntValues()
 			newCol.Histogram = *statistics.UpdateHistogram(&col.Histogram, newFB)
+			newCol.Flag = 0
 			newTblStats.Columns[fb.Hist.ID] = &newCol
 		}
 		h.UpdateTableStats([]*statistics.Table{newTblStats}, nil)
diff --git a/statistics/histogram.go b/statistics/histogram.go
index 31b70eabecd79..dc112894c916d 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -34,7 +34,6 @@ import (
 	"github.com/pingcap/tidb/util/logutil"
 	"github.com/pingcap/tidb/util/ranger"
 	"github.com/pingcap/tipb/go-tipb"
-	"github.com/spaolacci/murmur3"
 	"go.uber.org/zap"
 )
 
@@ -566,17 +565,6 @@ func (hg *Histogram) outOfRange(val types.Datum) bool {
 		chunk.Compare(hg.Bounds.GetRow(hg.Bounds.NumRows()-1), 0, &val) < 0
 }
 
-// ContainsFeedback checks if the histogram contains feedback updates.
-// We can test it from the `repeat` field because only feedback will update it to 0.
-func (hg *Histogram) ContainsFeedback() bool {
-	for _, bkt := range hg.Buckets {
-		if bkt.Repeat == 0 {
-			return true
-		}
-	}
-	return false
-}
-
 // Copy deep copies the histogram.
 func (hg *Histogram) Copy() *Histogram {
 	newHist := *hg
@@ -588,6 +576,23 @@ func (hg *Histogram) Copy() *Histogram {
 	return &newHist
 }
 
+// RemoveUpperBound removes the upper bound from histogram.
+// It is used when merge stats for incremental analyze.
+func (hg *Histogram) RemoveUpperBound() *Histogram {
+	hg.Buckets[hg.Len()-1].Count -= hg.Buckets[hg.Len()-1].Repeat
+	hg.Buckets[hg.Len()-1].Repeat = 0
+	return hg
+}
+
+// TruncateHistogram truncates the histogram to `numBkt` buckets.
+func (hg *Histogram) TruncateHistogram(numBkt int) *Histogram {
+	hist := hg.Copy()
+	hist.Buckets = hist.Buckets[:numBkt]
+	hist.Bounds.TruncateTo(numBkt * 2)
+	hist.NDV = int64(float64(hg.NDV) * (hist.TotalRowCount() / hg.TotalRowCount()))
+	return hist
+}
+
 // ErrorRate is the error rate of estimate row count by bucket and cm sketch.
 type ErrorRate struct {
 	ErrorTotal float64
@@ -629,6 +634,8 @@ type Column struct {
 	Info       *model.ColumnInfo
 	IsHandle   bool
 	ErrorRate
+	Flag           int64
+	LastAnalyzePos types.Datum
 }
 
 func (c *Column) String() string {
@@ -730,8 +737,10 @@ type Index struct {
 	Histogram
 	*CMSketch
 	ErrorRate
-	StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility
-	Info     *model.IndexInfo
+	StatsVer       int64 // StatsVer is the version of the current stats, used to maintain compatibility
+	Info           *model.IndexInfo
+	Flag           int64
+	LastAnalyzePos types.Datum
 }
 
 func (idx *Index) String() string {
@@ -990,28 +999,6 @@ func (idx *Index) outOfRange(val types.Datum) bool {
 	return !withInLowBoundOrPrefixMatch || !withInHighBound
 }
 
-// RemoveUpperBound removes the upper bound the index stats.
-// It is used when merge stats for incremental analyze.
-func (idx *Index) RemoveUpperBound(sc *stmtctx.StatementContext, values []types.Datum) (*Histogram, *CMSketch, error) {
-	hist, cms := idx.Histogram.Copy(), idx.CMSketch.Copy()
-	hist.Buckets[hist.Len()-1].Count -= hist.Buckets[hist.Len()-1].Repeat
-	hist.Buckets[hist.Len()-1].Repeat = 0
-	if cms == nil {
-		return hist, nil, nil
-	}
-	var data []byte
-	var err error
-	for _, val := range values {
-		data, err = codec.EncodeKey(sc, data, val)
-		if err != nil {
-			return nil, nil, err
-		}
-		h1, h2 := murmur3.Sum128(data)
-		cms.setValue(h1, h2, 0)
-	}
-	return hist, cms, nil
-}
-
 // matchPrefix checks whether ad is the prefix of value
 func matchPrefix(row chunk.Row, colIdx int, ad *types.Datum) bool {
 	switch ad.Kind() {

From 5357e8d5397436d261e151b1822dcf062699e01e Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Sun, 5 May 2019 19:00:56 +0800
Subject: [PATCH 2/4] fix ci

---
 executor/builder.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executor/builder.go b/executor/builder.go
index 10b819123fe2f..c7ceb8787d7e5 100644
--- a/executor/builder.go
+++ b/executor/builder.go
@@ -1404,7 +1404,7 @@ func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeI
 		}
 		oldHist = idx.TruncateHistogram(bktID)
 	}
-	oldHist = idx.Histogram.RemoveUpperBound()
+	oldHist = oldHist.RemoveUpperBound()
 	analyzeTask.taskType = idxIncrementalTask
 	analyzeTask.idxIncrementalExec = &analyzeIndexIncrementalExec{AnalyzeIndexExec: *analyzeTask.idxExec, oldHist: oldHist, oldCMS: idx.CMSketch}
 	analyzeTask.job = &statistics.AnalyzeJob{DBName: task.DBName, TableName: task.TableName, PartitionName: task.PartitionName, JobInfo: "analyze incremental index " + task.IndexInfo.Name.O}

From 517c0ea209131fa69360b4845f9b880c8ddf464f Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Tue, 7 May 2019 19:42:18 +0800
Subject: [PATCH 3/4] address comments

---
 statistics/cmsketch.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go
index 55687064e03ae..747337c1d50ee 100644
--- a/statistics/cmsketch.go
+++ b/statistics/cmsketch.go
@@ -304,7 +304,7 @@ func (c *CMSketch) MergeCMSketch(rc *CMSketch) error {
 //   (1): For values that only appears in `c, using `max` to merge them affects the `min` query result less than using `sum`;
 //   (2): For values that only appears in `rc`, it is the same as condition (1);
 //   (3): For values that appears both in `c` and `rc`, if they do not appear partially in `c` and `rc`, for example,
-//        if `v` appears 5 times in the table, it can appears 3 times in `c` and 5 times in `rc`, then `max` also gives the correct answer.
+//        if `v` appears 5 times in the table, it can appears 5 times in `c` and 3 times in `rc`, then `max` also gives the correct answer.
 // So in fact, if we can know the number of appearances of each value in the first place, it is better to use `max` to construct the CM sketch rather than `sum`.
 func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch) error {
 	if c.depth != rc.depth || c.width != rc.width {

From e8dc21e244aa70fe5bfd1916e98f453ce2b000e4 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Wed, 8 May 2019 11:58:13 +0800
Subject: [PATCH 4/4] address comments

---
 executor/analyze_test.go | 2 +-
 statistics/histogram.go  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/executor/analyze_test.go b/executor/analyze_test.go
index e4d1225a098b4..d2e579286dd3d 100644
--- a/executor/analyze_test.go
+++ b/executor/analyze_test.go
@@ -325,7 +325,7 @@ func (s *testSuite1) TestAnalyzeIncremental(c *C) {
 	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
 	c.Assert(h.DumpStatsFeedbackToKV(), IsNil)
 	c.Assert(h.HandleUpdateStats(is), IsNil)
-	h.Update(is)
+	c.Assert(h.Update(is), IsNil)
 	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  a 0 1 3 0 2 2147483647", "test t  idx 1 0 1 1 1 1", "test t  idx 1 1 2 1 2 2"))
 	tblStats := h.GetTableStats(tblInfo)
 	val, err := codec.EncodeKey(tk.Se.GetSessionVars().StmtCtx, nil, types.NewIntDatum(3))
diff --git a/statistics/histogram.go b/statistics/histogram.go
index ca5846f2f88af..951070a45fe20 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -196,7 +196,7 @@ const (
 	Version1        = 1
 )
 
-// AnalyzeFlag is one for column flag. We can use IsAnalyzed to check whether this column is analyzed or not.
+// AnalyzeFlag is set when the statistics comes from analyze and has not been modified by feedback.
 const AnalyzeFlag = 1
 
 // IsAnalyzed checks whether this flag contains AnalyzeFlag.
@@ -204,7 +204,7 @@ func IsAnalyzed(flag int64) bool {
 	return (flag & AnalyzeFlag) > 0
 }
 
-// ResetAnalyzeFlag resets the AnalyzeFlag.
+// ResetAnalyzeFlag resets the AnalyzeFlag because it has been modified by feedback.
 func ResetAnalyzeFlag(flag int64) int64 {
 	return flag &^ AnalyzeFlag
 }