From 50426a99b20a4995ae440c870a0d90c83030100d Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Sat, 28 Apr 2018 15:58:31 +0800
Subject: [PATCH] plan: calculate table access paths in
 `DataSource.deriveStats` (#6346)

---
 plan/build_key_info.go        |   7 +-
 plan/gen_physical_plans.go    |  17 +++-
 plan/logical_plan_builder.go  |  14 ++--
 plan/logical_plans.go         |  87 ++++++++++++++++++--
 plan/optimizer.go             |   5 +-
 plan/physical_plan_builder.go | 145 ++++++++++++----------------------
 plan/plan.go                  |   2 +-
 plan/planbuilder.go           |  65 ++++++++-------
 plan/property_cols_prune.go   |  36 +++------
 plan/stats.go                 | 137 ++++++++++++++++++++++----------
 10 files changed, 303 insertions(+), 212 deletions(-)

diff --git a/plan/build_key_info.go b/plan/build_key_info.go
index 639f2230df5c0..24cf42072bc7a 100644
--- a/plan/build_key_info.go
+++ b/plan/build_key_info.go
@@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() {
 func (ds *DataSource) buildKeyInfo() {
 	ds.schema.Keys = nil
 	ds.baseLogicalPlan.buildKeyInfo()
-	indices := ds.availableIndices.indices
-	for _, idx := range indices {
+	for _, path := range ds.possibleAccessPaths {
+		if path.isTablePath {
+			continue
+		}
+		idx := path.index
 		if !idx.Unique {
 			continue
 		}
diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go
index c5b9691042cd6..1a665e56d8593 100644
--- a/plan/gen_physical_plans.go
+++ b/plan/gen_physical_plans.go
@@ -256,9 +256,14 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
 	if !ok {
 		return nil
 	}
-	indices := x.availableIndices.indices
-	includeTableScan := x.availableIndices.includeTableScan
-	if includeTableScan && len(innerJoinKeys) == 1 {
+	var tblPath *accessPath
+	for _, path := range x.possibleAccessPaths {
+		if path.isTablePath {
+			tblPath = path
+			break
+		}
+	}
+	if tblPath != nil && len(innerJoinKeys) == 1 {
 		pkCol := x.getPKIsHandleCol()
 		if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) {
 			innerPlan := x.forceToTableScan(pkCol)
@@ -272,7 +277,11 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
 		remainedOfBest []expression.Expression
 		keyOff2IdxOff  []int
 	)
-	for _, indexInfo := range indices {
+	for _, path := range x.possibleAccessPaths {
+		if path.isTablePath {
+			continue
+		}
+		indexInfo := path.index
 		ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys)
 		// We choose the index by the number of used columns of the range, the much the better.
 		// Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid.
diff --git a/plan/logical_plan_builder.go b/plan/logical_plan_builder.go
index 3d4be5dc6c067..d1f9986cc925c 100644
--- a/plan/logical_plan_builder.go
+++ b/plan/logical_plan_builder.go
@@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
 	tableInfo := tbl.Meta()
 	b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "")
 
-	availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo)
+	possiblePaths, err := getPossibleAccessPaths(tn.IndexHints, tableInfo)
 	if err != nil {
 		b.err = errors.Trace(err)
 		return nil
@@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
 	}
 
 	ds := DataSource{
-		DBName:           dbName,
-		tableInfo:        tableInfo,
-		statisticTable:   b.getStatsTable(tableInfo),
-		indexHints:       tn.IndexHints,
-		availableIndices: availableIdxes,
-		Columns:          make([]*model.ColumnInfo, 0, len(columns)),
+		DBName:              dbName,
+		tableInfo:           tableInfo,
+		statisticTable:      b.getStatsTable(tableInfo),
+		indexHints:          tn.IndexHints,
+		possibleAccessPaths: possiblePaths,
+		Columns:             make([]*model.ColumnInfo, 0, len(columns)),
 	}.init(b.ctx)
 
 	var handleCol *expression.Column
diff --git a/plan/logical_plans.go b/plan/logical_plans.go
index fe484a304a0f5..53fd2b4e3e9bb 100644
--- a/plan/logical_plans.go
+++ b/plan/logical_plans.go
@@ -14,6 +14,9 @@
 package plan
 
 import (
+	"math"
+
+	"github.com/juju/errors"
 	"github.com/pingcap/tidb/ast"
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/expression/aggregation"
@@ -21,6 +24,8 @@ import (
 	"github.com/pingcap/tidb/mysql"
 	"github.com/pingcap/tidb/statistics"
 	"github.com/pingcap/tidb/types"
+	"github.com/pingcap/tidb/util/ranger"
+	log "github.com/sirupsen/logrus"
 )
 
 var (
@@ -299,13 +304,85 @@ type DataSource struct {
 
 	statisticTable *statistics.Table
 
-	// availableIndices is used for storing result of availableIndices function.
-	availableIndices *availableIndices
+	// possibleAccessPaths stores all the possible access path for physical plan, including table scan.
+	possibleAccessPaths []*accessPath
+}
+
+// accessPath tells how we access one index or just access table.
+type accessPath struct {
+	index  *model.IndexInfo
+	ranges []*ranger.Range
+	// countAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
+	countAfterAccess float64
+	// countAfterIndex is the row count after we apply filters on index and before we apply the table filters.
+	countAfterIndex float64
+	accessConds     []expression.Expression
+	eqCondCount     int
+	indexFilters    []expression.Expression
+	tableFilters    []expression.Expression
+	// isTablePath indicates whether this path is table path.
+	isTablePath bool
+	// forced means this path is generated by `use/force index()`.
+	forced bool
+}
+
+func (ds *DataSource) deriveTablePathStats(path *accessPath) error {
+	var err error
+	sc := ds.ctx.GetSessionVars().StmtCtx
+	path.countAfterAccess = float64(ds.statisticTable.Count)
+	path.tableFilters = ds.pushedDownConds
+	var pkCol *expression.Column
+	if ds.tableInfo.PKIsHandle {
+		if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil {
+			pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo)
+		}
+	}
+	if pkCol == nil {
+		path.ranges = ranger.FullIntRange(false)
+		return nil
+	}
+	path.ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
+	if len(ds.pushedDownConds) == 0 {
+		return nil
+	}
+	path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
+	path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges)
+	return errors.Trace(err)
 }
 
-type availableIndices struct {
-	indices          []*model.IndexInfo
-	includeTableScan bool
+func (ds *DataSource) deriveIndexPathStats(path *accessPath) error {
+	var err error
+	sc := ds.ctx.GetSessionVars().StmtCtx
+	path.ranges = ranger.FullRange()
+	path.countAfterAccess = float64(ds.statisticTable.Count)
+	idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index)
+	if len(idxCols) != 0 {
+		path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths)
+		if err != nil {
+			return errors.Trace(err)
+		}
+		path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
+		if err != nil {
+			return errors.Trace(err)
+		}
+		path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo)
+	} else {
+		path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo)
+	}
+	path.countAfterIndex = path.countAfterAccess
+	if path.indexFilters != nil {
+		selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters)
+		if err != nil {
+			log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
+			selectivity = selectionFactor
+		}
+		path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count)
+	}
+	return nil
 }
 
 func (ds *DataSource) getPKIsHandleCol() *expression.Column {
diff --git a/plan/optimizer.go b/plan/optimizer.go
index 47e0de2a08035..9c7c01ea7f390 100644
--- a/plan/optimizer.go
+++ b/plan/optimizer.go
@@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) {
 
 func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) {
 	logic.preparePossibleProperties()
-	logic.deriveStats()
+	_, err := logic.deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64})
 	if err != nil {
 		return nil, errors.Trace(err)
diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go
index 5193412e6a472..cb931305a9699 100644
--- a/plan/physical_plan_builder.go
+++ b/plan/physical_plan_builder.go
@@ -23,7 +23,6 @@ import (
 	"github.com/pingcap/tidb/mysql"
 	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/util/ranger"
-	log "github.com/sirupsen/logrus"
 )
 
 const (
@@ -197,22 +196,25 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) {
 		return t, nil
 	}
 
-	indices := ds.availableIndices.indices
-	includeTableScan := ds.availableIndices.includeTableScan
 	t = invalidTask
-	if includeTableScan {
-		t, err = ds.convertToTableScan(prop)
-		if err != nil {
-			return nil, errors.Trace(err)
-		}
-	}
-	if !includeTableScan || len(ds.pushedDownConds) > 0 || len(prop.cols) > 0 {
-		for i, idx := range indices {
-			// TODO: We can also check if the prop matches the index columns.
-			if !ds.relevantIndices[i] && len(prop.cols) == 0 {
-				continue
+
+	for _, path := range ds.possibleAccessPaths {
+		if path.isTablePath {
+			tblTask, err := ds.convertToTableScan(prop, path)
+			if err != nil {
+				return nil, errors.Trace(err)
+			}
+			if tblTask.cost() < t.cost() {
+				t = tblTask
 			}
-			idxTask, err := ds.convertToIndexScan(prop, idx)
+			continue
+		}
+		// We will use index to generate physical plan if:
+		// this path's access cond is not nil or
+		// we have prop to match or
+		// this index is forced to choose.
+		if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced {
+			idxTask, err := ds.convertToIndexScan(prop, path)
 			if err != nil {
 				return nil, errors.Trace(err)
 			}
@@ -221,6 +223,7 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) {
 			}
 		}
 	}
+
 	ds.storeTask(prop, t)
 	return t, nil
 }
@@ -271,7 +274,9 @@ func (ds *DataSource) forceToIndexScan(idx *model.IndexInfo, remainedConds []exp
 		cop.tablePlan = ts
 	}
 	is.initSchema(ds.id, idx, cop.tablePlan != nil)
-	is.addPushedDownSelection(cop, ds, math.MaxFloat64)
+	indexConds, tblConds := splitIndexFilterConditions(remainedConds, idx.Columns, ds.tableInfo)
+	path := &accessPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64}
+	is.addPushedDownSelection(cop, ds, math.MaxFloat64, path)
 	t := finishCopTask(ds.ctx, cop)
 	return t.plan()
 }
@@ -289,7 +294,8 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) {
 }
 
 // convertToIndexScan converts the DataSource to index scan with idx.
-func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInfo) (task task, err error) {
+func (ds *DataSource) convertToIndexScan(prop *requiredProp, path *accessPath) (task task, err error) {
+	idx := path.index
 	is := PhysicalIndexScan{
 		Table:            ds.tableInfo,
 		TableAsName:      ds.TableAsName,
@@ -302,26 +308,10 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
 	if statsTbl.Indices[idx.ID] != nil {
 		is.Hist = &statsTbl.Indices[idx.ID].Histogram
 	}
-	rowCount := float64(statsTbl.Count)
-	sc := ds.ctx.GetSessionVars().StmtCtx
-	idxCols, colLengths := expression.IndexInfo2Cols(ds.Schema().Columns, idx)
 	is.Ranges = ranger.FullRange()
 	eqCount := 0
-	if len(ds.pushedDownConds) > 0 {
-		is.conditions = ds.pushedDownConds
-		if len(idxCols) > 0 {
-			is.Ranges, is.AccessCondition, is.filterCondition, eqCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, colLengths)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-			rowCount, err = statsTbl.GetRowCountByIndexRanges(sc, is.Index.ID, is.Ranges)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-		} else {
-			is.filterCondition = ds.pushedDownConds
-		}
-	}
+	is.AccessCondition, is.Ranges, is.filterCondition, eqCount = path.accessConds, path.ranges, path.indexFilters, path.eqCondCount
+	rowCount := path.countAfterAccess
 	cop := &copTask{indexPlan: is}
 	if !isCoveringIndex(is.Columns, is.Index.Columns, is.Table.PKIsHandle) {
 		// On this way, it's double read case.
@@ -353,12 +343,8 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
 	// Only use expectedCnt when it's smaller than the count we calculated.
 	// e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate
 	// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
-	if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count {
-		selectivity, err := statsTbl.Selectivity(ds.ctx, is.filterCondition)
-		if err != nil {
-			log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
-			selectivity = selectionFactor
-		}
+	if matchProperty && prop.expectedCnt < path.countAfterIndex {
+		selectivity := path.countAfterIndex / path.countAfterAccess
 		rowCount = math.Min(prop.expectedCnt/selectivity, rowCount)
 	}
 	is.stats = ds.stats.scaleByExpectCnt(rowCount)
@@ -374,7 +360,7 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
 		}
 		cop.keepOrder = true
 		is.KeepOrder = true
-		is.addPushedDownSelection(cop, ds, prop.expectedCnt)
+		is.addPushedDownSelection(cop, ds, prop.expectedCnt, path)
 	} else {
 		expectedCnt := math.MaxFloat64
 		if prop.isEmpty() {
@@ -382,7 +368,7 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
 		} else {
 			return invalidTask, nil
 		}
-		is.addPushedDownSelection(cop, ds, expectedCnt)
+		is.addPushedDownSelection(cop, ds, expectedCnt, path)
 	}
 	if prop.taskTp == rootTaskType {
 		task = finishCopTask(ds.ctx, task)
@@ -413,29 +399,23 @@ func (is *PhysicalIndexScan) initSchema(id int, idx *model.IndexInfo, isDoubleRe
 	is.SetSchema(expression.NewSchema(indexCols...))
 }
 
-func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64) {
+func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64, path *accessPath) {
 	// Add filter condition to table plan now.
-	if len(is.filterCondition) > 0 {
-		var indexConds, tableConds []expression.Expression
-		if copTask.tablePlan != nil {
-			indexConds, tableConds = splitIndexFilterConditions(is.filterCondition, is.Index.Columns, is.Table)
-		} else {
-			indexConds = is.filterCondition
-		}
-		if indexConds != nil {
-			indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx,
-				p.getStatsByFilter(append(is.AccessCondition, indexConds...)).scaleByExpectCnt(expectedCnt))
-			indexSel.SetChildren(is)
-			copTask.indexPlan = indexSel
-			copTask.cst += copTask.count() * cpuFactor
-		}
-		if tableConds != nil {
-			copTask.finishIndexPlan()
-			tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt))
-			tableSel.SetChildren(copTask.tablePlan)
-			copTask.tablePlan = tableSel
-			copTask.cst += copTask.count() * cpuFactor
-		}
+	indexConds, tableConds := path.indexFilters, path.tableFilters
+	if indexConds != nil {
+		stats := &statsInfo{count: path.countAfterIndex}
+		indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx,
+			stats.scaleByExpectCnt(expectedCnt))
+		indexSel.SetChildren(is)
+		copTask.indexPlan = indexSel
+		copTask.cst += copTask.count() * cpuFactor
+	}
+	if tableConds != nil {
+		copTask.finishIndexPlan()
+		tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt))
+		tableSel.SetChildren(copTask.tablePlan)
+		copTask.tablePlan = tableSel
+		copTask.cst += copTask.count() * cpuFactor
 	}
 }
 
@@ -519,7 +499,7 @@ func (ds *DataSource) forceToTableScan(pk *expression.Column) PhysicalPlan {
 }
 
 // convertToTableScan converts the DataSource to table scan.
-func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err error) {
+func (ds *DataSource) convertToTableScan(prop *requiredProp, path *accessPath) (task task, err error) {
 	// It will be handled in convertToIndexScan.
 	if prop.taskTp == copDoubleReadTaskType {
 		return &copTask{cst: math.MaxFloat64}, nil
@@ -532,7 +512,6 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err
 		DBName:      ds.DBName,
 	}.init(ds.ctx)
 	ts.SetSchema(ds.schema)
-	sc := ds.ctx.GetSessionVars().StmtCtx
 	var pkCol *expression.Column
 	if ts.Table.PKIsHandle {
 		if pkColInfo := ts.Table.GetPkColInfo(); pkColInfo != nil {
@@ -542,29 +521,9 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err
 			}
 		}
 	}
-	if pkCol != nil {
-		ts.Ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
-	} else {
-		ts.Ranges = ranger.FullIntRange(false)
-	}
-	statsTbl := ds.statisticTable
-	rowCount := float64(statsTbl.Count)
-	if len(ds.pushedDownConds) > 0 {
-		if pkCol != nil {
-			ts.AccessCondition, ts.filterCondition = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
-			ts.Ranges, err = ranger.BuildTableRange(ts.AccessCondition, sc, pkCol.RetType)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-			// TODO: We can use ds.getStatsByFilter(accessConditions).
-			rowCount, err = statsTbl.GetRowCountByIntColumnRanges(sc, pkCol.ID, ts.Ranges)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-		} else {
-			ts.filterCondition = ds.pushedDownConds
-		}
-	}
+	ts.Ranges = path.ranges
+	ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters
+	rowCount := path.countAfterAccess
 	copTask := &copTask{
 		tablePlan:         ts,
 		indexPlanFinished: true,
@@ -575,11 +534,7 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err
 	// e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate
 	// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
 	if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count {
-		selectivity, err := statsTbl.Selectivity(ds.ctx, ts.filterCondition)
-		if err != nil {
-			log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
-			selectivity = selectionFactor
-		}
+		selectivity := ds.statsAfterSelect.count / rowCount
 		rowCount = math.Min(prop.expectedCnt/selectivity, rowCount)
 	}
 	ts.stats = ds.stats.scaleByExpectCnt(rowCount)
diff --git a/plan/plan.go b/plan/plan.go
index 5296b44c35b81..84344be0f5417 100644
--- a/plan/plan.go
+++ b/plan/plan.go
@@ -161,7 +161,7 @@ type LogicalPlan interface {
 	pushDownTopN(topN *LogicalTopN) LogicalPlan
 
 	// deriveStats derives statistic info between plans.
-	deriveStats() *statsInfo
+	deriveStats() (*statsInfo, error)
 
 	// preparePossibleProperties is only used for join and aggregation. Like group by a,b,c, all permutation of (a,b,c) is
 	// valid, but the ordered indices in leaf plan is limited. So we can get all possible order properties by a pre-walking.
diff --git a/plan/planbuilder.go b/plan/planbuilder.go
index 837c7743dec4a..52fd021569084 100644
--- a/plan/planbuilder.go
+++ b/plan/planbuilder.go
@@ -317,17 +317,27 @@ func (b *planBuilder) detectSelectAgg(sel *ast.SelectStmt) bool {
 	return false
 }
 
-func getAvailableIndices(indexHints []*ast.IndexHint, tableInfo *model.TableInfo) (*availableIndices, error) {
-	publicIndices := make([]*model.IndexInfo, 0, len(tableInfo.Indices))
-	for _, index := range tableInfo.Indices {
+func getPathByIndexName(paths []*accessPath, idxName model.CIStr) *accessPath {
+	for _, path := range paths {
+		if path.index.Name.L == idxName.L {
+			return path
+		}
+	}
+	return nil
+}
+
+func getPossibleAccessPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInfo) ([]*accessPath, error) {
+	publicPaths := make([]*accessPath, 0, len(tblInfo.Indices)+1)
+	publicPaths = append(publicPaths, &accessPath{isTablePath: true})
+	for _, index := range tblInfo.Indices {
 		if index.State == model.StatePublic {
-			publicIndices = append(publicIndices, index)
+			publicPaths = append(publicPaths, &accessPath{index: index})
 		}
 	}
 
 	hasScanHint, hasUseOrForce := false, false
-	available := make([]*model.IndexInfo, 0, len(indexHints))
-	ignored := make([]*model.IndexInfo, 0, len(indexHints))
+	available := make([]*accessPath, 0, len(publicPaths))
+	ignored := make([]*accessPath, 0, len(publicPaths))
 	for _, hint := range indexHints {
 		if hint.HintScope != ast.HintForScan {
 			continue
@@ -335,47 +345,48 @@ func getAvailableIndices(indexHints []*ast.IndexHint, tableInfo *model.TableInfo
 
 		hasScanHint = true
 		for _, idxName := range hint.IndexNames {
-			idx := findIndexByName(publicIndices, idxName)
-			if idx == nil {
-				return nil, ErrKeyDoesNotExist.GenByArgs(idxName, tableInfo.Name)
+			path := getPathByIndexName(publicPaths[1:], idxName)
+			if path == nil {
+				return nil, ErrKeyDoesNotExist.GenByArgs(idxName, tblInfo.Name)
 			}
 			if hint.HintType == ast.HintIgnore {
 				// Collect all the ignored index hints.
-				ignored = append(ignored, idx)
+				ignored = append(ignored, path)
 				continue
 			}
 			// Currently we don't distinguish between "FORCE" and "USE" because
 			// our cost estimation is not reliable.
 			hasUseOrForce = true
-			available = append(available, idx)
+			path.forced = true
+			available = append(available, path)
 		}
 	}
 
-	if !hasScanHint {
-		return &availableIndices{publicIndices, true}, nil
-	}
-	if !hasUseOrForce {
-		available = removeIgnoredIndices(publicIndices, ignored)
-		return &availableIndices{available, true}, nil
+	if !hasScanHint || !hasUseOrForce {
+		available = publicPaths
 	}
 
-	available = removeIgnoredIndices(available, ignored)
+	available = removeIgnoredPaths(available, ignored)
+
 	// If we have got "FORCE" or "USE" index hint but got no available index,
 	// we have to use table scan.
-	return &availableIndices{available, len(available) == 0}, nil
+	if len(available) == 0 {
+		available = append(available, &accessPath{isTablePath: true})
+	}
+	return available, nil
 }
 
-func removeIgnoredIndices(indices, ignores []*model.IndexInfo) []*model.IndexInfo {
-	if len(ignores) == 0 {
-		return indices
+func removeIgnoredPaths(paths, ignoredPaths []*accessPath) []*accessPath {
+	if len(ignoredPaths) == 0 {
+		return paths
 	}
-	var remainedIndices []*model.IndexInfo
-	for _, index := range indices {
-		if findIndexByName(ignores, index.Name) == nil {
-			remainedIndices = append(remainedIndices, index)
+	remainedPaths := make([]*accessPath, 0, len(paths))
+	for _, path := range paths {
+		if path.isTablePath || getPathByIndexName(ignoredPaths, path.index.Name) == nil {
+			remainedPaths = append(remainedPaths, path)
 		}
 	}
-	return remainedIndices
+	return remainedPaths
 }
 
 func findIndexByName(indices []*model.IndexInfo, name model.CIStr) *model.IndexInfo {
diff --git a/plan/property_cols_prune.go b/plan/property_cols_prune.go
index 5449ed87ae3c4..1333c9610f255 100644
--- a/plan/property_cols_prune.go
+++ b/plan/property_cols_prune.go
@@ -18,35 +18,17 @@ import (
 )
 
 func (ds *DataSource) preparePossibleProperties() [][]*expression.Column {
-	indices := ds.availableIndices.indices
-	includeTS := ds.availableIndices.includeTableScan
+	result := make([][]*expression.Column, 0, len(ds.possibleAccessPaths))
 
-	result := make([][]*expression.Column, 0, len(indices))
-	ds.relevantIndices = make([]bool, len(indices))
-
-	if includeTS {
-		col := ds.getPKIsHandleCol()
-		if col != nil {
-			result = append(result, []*expression.Column{col})
-		}
-
-		cols := expression.ExtractColumnsFromExpressions(make([]*expression.Column, 0, 10), ds.pushedDownConds, nil)
-		colsSet := make(map[string]struct{}, len(cols))
-		for _, col := range cols {
-			colsSet[col.ColName.L] = struct{}{}
-		}
-
-		for i, idx := range indices {
-			_, ok := colsSet[idx.Columns[0].Name.L]
-			ds.relevantIndices[i] = ok
-		}
-	} else {
-		for i := range ds.relevantIndices {
-			ds.relevantIndices[i] = true
+	for _, path := range ds.possibleAccessPaths {
+		if path.isTablePath {
+			col := ds.getPKIsHandleCol()
+			if col != nil {
+				result = append(result, []*expression.Column{col})
+			}
+			continue
 		}
-	}
-	for _, idx := range indices {
-		cols, _ := expression.IndexInfo2Cols(ds.schema.Columns, idx)
+		cols, _ := expression.IndexInfo2Cols(ds.schema.Columns, path.index)
 		if len(cols) > 0 {
 			result = append(result, cols)
 		}
diff --git a/plan/stats.go b/plan/stats.go
index b1d7ea7081264..c10b133f94f91 100644
--- a/plan/stats.go
+++ b/plan/stats.go
@@ -17,6 +17,7 @@ import (
 	"fmt"
 	"math"
 
+	"github.com/juju/errors"
 	"github.com/pingcap/tidb/expression"
 	log "github.com/sirupsen/logrus"
 )
@@ -62,7 +63,7 @@ func (p *basePhysicalPlan) StatsInfo() *statsInfo {
 	return p.stats
 }
 
-func (p *LogicalTableDual) deriveStats() *statsInfo {
+func (p *LogicalTableDual) deriveStats() (*statsInfo, error) {
 	profile := &statsInfo{
 		count:       float64(p.RowCount),
 		cardinality: make([]float64, p.Schema().Len()),
@@ -71,17 +72,18 @@ func (p *LogicalTableDual) deriveStats() *statsInfo {
 		profile.cardinality[i] = float64(p.RowCount)
 	}
 	p.stats = profile
-	return p.stats
+	return p.stats, nil
 }
 
-func (p *baseLogicalPlan) deriveStats() *statsInfo {
+func (p *baseLogicalPlan) deriveStats() (*statsInfo, error) {
 	if len(p.children) > 1 {
 		panic("LogicalPlans with more than one child should implement their own deriveStats().")
 	}
 
 	if len(p.children) == 1 {
-		p.stats = p.children[0].deriveStats()
-		return p.stats
+		var err error
+		p.stats, err = p.children[0].deriveStats()
+		return p.stats, errors.Trace(err)
 	}
 
 	profile := &statsInfo{
@@ -92,7 +94,7 @@ func (p *baseLogicalPlan) deriveStats() *statsInfo {
 		profile.cardinality[i] = float64(1)
 	}
 	p.stats = profile
-	return profile
+	return profile, nil
 }
 
 func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo {
@@ -118,37 +120,59 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo {
 	return profile.scale(selectivity)
 }
 
-func (ds *DataSource) deriveStats() *statsInfo {
+func (ds *DataSource) deriveStats() (*statsInfo, error) {
 	// PushDownNot here can convert query 'not (a != 1)' to 'a = 1'.
 	for i, expr := range ds.pushedDownConds {
 		ds.pushedDownConds[i] = expression.PushDownNot(nil, expr, false)
 	}
 	ds.statsAfterSelect = ds.getStatsByFilter(ds.pushedDownConds)
-	return ds.statsAfterSelect
+	for _, path := range ds.possibleAccessPaths {
+		if path.isTablePath {
+			err := ds.deriveTablePathStats(path)
+			if err != nil {
+				return nil, errors.Trace(err)
+			}
+			continue
+		}
+		err := ds.deriveIndexPathStats(path)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+	}
+	return ds.statsAfterSelect, nil
 }
 
-func (p *LogicalSelection) deriveStats() *statsInfo {
-	childProfile := p.children[0].deriveStats()
+func (p *LogicalSelection) deriveStats() (*statsInfo, error) {
+	childProfile, err := p.children[0].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	p.stats = childProfile.scale(selectionFactor)
-	return p.stats
+	return p.stats, nil
 }
 
-func (p *LogicalUnionAll) deriveStats() *statsInfo {
+func (p *LogicalUnionAll) deriveStats() (*statsInfo, error) {
 	p.stats = &statsInfo{
 		cardinality: make([]float64, p.Schema().Len()),
 	}
 	for _, child := range p.children {
-		childProfile := child.deriveStats()
+		childProfile, err := child.deriveStats()
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
 		p.stats.count += childProfile.count
 		for i := range p.stats.cardinality {
 			p.stats.cardinality[i] += childProfile.cardinality[i]
 		}
 	}
-	return p.stats
+	return p.stats, nil
 }
 
-func (p *LogicalLimit) deriveStats() *statsInfo {
-	childProfile := p.children[0].deriveStats()
+func (p *LogicalLimit) deriveStats() (*statsInfo, error) {
+	childProfile, err := p.children[0].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	p.stats = &statsInfo{
 		count:       math.Min(float64(p.Count), childProfile.count),
 		cardinality: make([]float64, len(childProfile.cardinality)),
@@ -156,11 +180,14 @@ func (p *LogicalLimit) deriveStats() *statsInfo {
 	for i := range p.stats.cardinality {
 		p.stats.cardinality[i] = math.Min(childProfile.cardinality[i], p.stats.count)
 	}
-	return p.stats
+	return p.stats, nil
 }
 
-func (lt *LogicalTopN) deriveStats() *statsInfo {
-	childProfile := lt.children[0].deriveStats()
+func (lt *LogicalTopN) deriveStats() (*statsInfo, error) {
+	childProfile, err := lt.children[0].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	lt.stats = &statsInfo{
 		count:       math.Min(float64(lt.Count), childProfile.count),
 		cardinality: make([]float64, len(childProfile.cardinality)),
@@ -168,7 +195,7 @@ func (lt *LogicalTopN) deriveStats() *statsInfo {
 	for i := range lt.stats.cardinality {
 		lt.stats.cardinality[i] = math.Min(childProfile.cardinality[i], lt.stats.count)
 	}
-	return lt.stats
+	return lt.stats, nil
 }
 
 // getCardinality will return the cardinality of a couple of columns. We simply return the max one, because we cannot know
@@ -187,8 +214,11 @@ func getCardinality(cols []*expression.Column, schema *expression.Schema, profil
 	return cardinality
 }
 
-func (p *LogicalProjection) deriveStats() *statsInfo {
-	childProfile := p.children[0].deriveStats()
+func (p *LogicalProjection) deriveStats() (*statsInfo, error) {
+	childProfile, err := p.children[0].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	p.stats = &statsInfo{
 		count:       childProfile.count,
 		cardinality: make([]float64, len(p.Exprs)),
@@ -197,11 +227,14 @@ func (p *LogicalProjection) deriveStats() *statsInfo {
 		cols := expression.ExtractColumns(expr)
 		p.stats.cardinality[i] = getCardinality(cols, p.children[0].Schema(), childProfile)
 	}
-	return p.stats
+	return p.stats, nil
 }
 
-func (la *LogicalAggregation) deriveStats() *statsInfo {
-	childProfile := la.children[0].deriveStats()
+func (la *LogicalAggregation) deriveStats() (*statsInfo, error) {
+	childProfile, err := la.children[0].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	gbyCols := make([]*expression.Column, 0, len(la.GroupByItems))
 	for _, gbyExpr := range la.GroupByItems {
 		cols := expression.ExtractColumns(gbyExpr)
@@ -217,7 +250,7 @@ func (la *LogicalAggregation) deriveStats() *statsInfo {
 		la.stats.cardinality[i] = cardinality
 	}
 	la.inputCount = childProfile.count
-	return la.stats
+	return la.stats, nil
 }
 
 // deriveStats prepares statsInfo.
@@ -227,9 +260,15 @@ func (la *LogicalAggregation) deriveStats() *statsInfo {
 // N(s) stands for the number of rows in relation s. V(s.key) means the cardinality of join key in s.
 // This is a quite simple strategy: We assume every bucket of relation which will participate join has the same number of rows, and apply cross join for
 // every matched bucket.
-func (p *LogicalJoin) deriveStats() *statsInfo {
-	leftProfile := p.children[0].deriveStats()
-	rightProfile := p.children[1].deriveStats()
+func (p *LogicalJoin) deriveStats() (*statsInfo, error) {
+	leftProfile, err := p.children[0].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	rightProfile, err := p.children[1].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	if p.JoinType == SemiJoin || p.JoinType == AntiSemiJoin {
 		p.stats = &statsInfo{
 			count:       leftProfile.count * selectionFactor,
@@ -238,7 +277,7 @@ func (p *LogicalJoin) deriveStats() *statsInfo {
 		for i := range p.stats.cardinality {
 			p.stats.cardinality[i] = leftProfile.cardinality[i] * selectionFactor
 		}
-		return p.stats
+		return p.stats, nil
 	}
 	if p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin {
 		p.stats = &statsInfo{
@@ -247,14 +286,14 @@ func (p *LogicalJoin) deriveStats() *statsInfo {
 		}
 		copy(p.stats.cardinality, leftProfile.cardinality)
 		p.stats.cardinality[len(p.stats.cardinality)-1] = 2.0
-		return p.stats
+		return p.stats, nil
 	}
 	if 0 == len(p.EqualConditions) {
 		p.stats = &statsInfo{
 			count:       leftProfile.count * rightProfile.count,
 			cardinality: append(leftProfile.cardinality, rightProfile.cardinality...),
 		}
-		return p.stats
+		return p.stats, nil
 	}
 	leftKeys := make([]*expression.Column, 0, len(p.EqualConditions))
 	rightKeys := make([]*expression.Column, 0, len(p.EqualConditions))
@@ -280,12 +319,18 @@ func (p *LogicalJoin) deriveStats() *statsInfo {
 		count:       count,
 		cardinality: cardinality,
 	}
-	return p.stats
+	return p.stats, nil
 }
 
-func (la *LogicalApply) deriveStats() *statsInfo {
-	leftProfile := la.children[0].deriveStats()
-	_ = la.children[1].deriveStats()
+func (la *LogicalApply) deriveStats() (*statsInfo, error) {
+	leftProfile, err := la.children[0].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	_, err = la.children[1].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	la.stats = &statsInfo{
 		count:       leftProfile.count,
 		cardinality: make([]float64, la.schema.Len()),
@@ -298,7 +343,7 @@ func (la *LogicalApply) deriveStats() *statsInfo {
 			la.stats.cardinality[i] = leftProfile.count
 		}
 	}
-	return la.stats
+	return la.stats, nil
 }
 
 // Exists and MaxOneRow produce at most one row, so we set the count of stats one.
@@ -313,14 +358,20 @@ func getSingletonStats(len int) *statsInfo {
 	return ret
 }
 
-func (p *LogicalExists) deriveStats() *statsInfo {
-	p.children[0].deriveStats()
+func (p *LogicalExists) deriveStats() (*statsInfo, error) {
+	_, err := p.children[0].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	p.stats = getSingletonStats(1)
-	return p.stats
+	return p.stats, nil
 }
 
-func (p *LogicalMaxOneRow) deriveStats() *statsInfo {
-	p.children[0].deriveStats()
+func (p *LogicalMaxOneRow) deriveStats() (*statsInfo, error) {
+	_, err := p.children[0].deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	p.stats = getSingletonStats(p.Schema().Len())
-	return p.stats
+	return p.stats, nil
 }