pingcap · zz-jason · Apr 28, 2018 · Apr 23, 2018 · Apr 23, 2018 · Apr 24, 2018
diff --git a/plan/build_key_info.go b/plan/build_key_info.go
@@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() {
 func (ds *DataSource) buildKeyInfo() {
 	ds.schema.Keys = nil
 	ds.baseLogicalPlan.buildKeyInfo()
-	indices := ds.availableIndices.indices
-	for _, idx := range indices {
+	for _, path := range ds.possibleIndexPaths {
+		if path.isRowID {
+			continue
+		}
+		idx := path.index
 		if !idx.Unique {
 			continue
 		}

diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go
@@ -256,9 +256,9 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
 	if !ok {
 		return nil
 	}
-	indices := x.availableIndices.indices
-	includeTableScan := x.availableIndices.includeTableScan
-	if includeTableScan && len(innerJoinKeys) == 1 {
+	indexPaths := x.possibleIndexPaths
+	if len(x.possibleIndexPaths) > 0 && x.possibleIndexPaths[0].isRowID {
+		indexPaths = indexPaths[1:]
 		pkCol := x.getPKIsHandleCol()
 		if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) {
 			innerPlan := x.forceToTableScan(pkCol)
@@ -272,7 +272,8 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
 		remainedOfBest []expression.Expression
 		keyOff2IdxOff  []int
 	)
-	for _, indexInfo := range indices {
+	for _, path := range indexPaths {
+		indexInfo := path.index
 		ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys)
 		// We choose the index by the number of used columns of the range, the much the better.
 		// Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid.

diff --git a/plan/logical_plan_builder.go b/plan/logical_plan_builder.go
@@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
 	tableInfo := tbl.Meta()
 	b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "")
 
-	availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo)
+	possiblePaths, err := getPossibleIndexPaths(tn.IndexHints, tableInfo)
 	if err != nil {
 		b.err = errors.Trace(err)
 		return nil
@@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
 	}
 
 	ds := DataSource{
-		DBName:           dbName,
-		tableInfo:        tableInfo,
-		statisticTable:   b.getStatsTable(tableInfo),
-		indexHints:       tn.IndexHints,
-		availableIndices: availableIdxes,
-		Columns:          make([]*model.ColumnInfo, 0, len(columns)),
+		DBName:             dbName,
+		tableInfo:          tableInfo,
+		statisticTable:     b.getStatsTable(tableInfo),
+		indexHints:         tn.IndexHints,
+		possibleIndexPaths: possiblePaths,
+		Columns:            make([]*model.ColumnInfo, 0, len(columns)),
 	}.init(b.ctx)
 
 	var handleCol *expression.Column

diff --git a/plan/logical_plans.go b/plan/logical_plans.go
@@ -21,6 +21,7 @@ import (
 	"github.com/pingcap/tidb/mysql"
 	"github.com/pingcap/tidb/statistics"
 	"github.com/pingcap/tidb/types"
+	"github.com/pingcap/tidb/util/ranger"
 )
 
 var (
@@ -299,13 +300,23 @@ type DataSource struct {
 
 	statisticTable *statistics.Table
 
-	// availableIndices is used for storing result of availableIndices function.
-	availableIndices *availableIndices
+	// possibleIndexPaths stores all the possible index path for physical plan, including table scan.
+	// Please make true table scan is always the first element.
+	possibleIndexPaths []*indexPath
 }
 
-type availableIndices struct {
-	indices          []*model.IndexInfo
-	includeTableScan bool
+type indexPath struct {
+	index            *model.IndexInfo
+	ranges           []*ranger.NewRange
+	countAfterAccess float64
+	countAfterIndex  float64
+	accessConds      []expression.Expression
+	eqCondCount      int
+	indexFilters     []expression.Expression
+	tableFilters     []expression.Expression
+	filterUnmatched  bool
+	isRowID          bool
+	forced           bool
 }
 
 func (ds *DataSource) getPKIsHandleCol() *expression.Column {

diff --git a/plan/optimizer.go b/plan/optimizer.go
@@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) {
 
 func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) {
 	logic.preparePossibleProperties()
-	logic.deriveStats()
+	_, err := logic.deriveStats()
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
 	t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64})
 	if err != nil {
 		return nil, errors.Trace(err)

diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go
@@ -24,7 +24,6 @@ import (
 	"github.com/pingcap/tidb/mysql"
 	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/util/ranger"
-	log "github.com/sirupsen/logrus"
 )
 
 const (
@@ -202,30 +201,30 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) {
 		return t, nil
 	}
 
-	indices := ds.availableIndices.indices
-	includeTableScan := ds.availableIndices.includeTableScan
 	t = invalidTask
-	if includeTableScan {
-		t, err = ds.convertToTableScan(prop)
-		if err != nil {
-			return nil, errors.Trace(err)
-		}
-	}
-	if !includeTableScan || len(ds.pushedDownConds) > 0 || len(prop.cols) > 0 {
-		for i, idx := range indices {
-			// TODO: We can also check if the prop matches the index columns.
-			if !ds.relevantIndices[i] && len(prop.cols) == 0 {
-				continue
-			}
-			idxTask, err := ds.convertToIndexScan(prop, idx)
+
+	for _, path := range ds.possibleIndexPaths {
+		if path.isRowID {
+			tblTask, err := ds.convertToTableScan(prop, path)
 			if err != nil {
 				return nil, errors.Trace(err)
 			}
-			if idxTask.cost() < t.cost() {
-				t = idxTask
+			if tblTask.cost() < t.cost() {
+				t = tblTask
+			}
+		} else {
+			if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced {
+				idxTask, err := ds.convertToIndexScan(prop, path)
+				if err != nil {
+					return nil, errors.Trace(err)
+				}
+				if idxTask.cost() < t.cost() {
+					t = idxTask
+				}
 			}
 		}
 	}
+
 	ds.storeTask(prop, t)
 	return t, nil
 }
@@ -276,7 +275,9 @@ func (ds *DataSource) forceToIndexScan(idx *model.IndexInfo, remainedConds []exp
 		cop.tablePlan = ts
 	}
 	is.initSchema(ds.id, idx, cop.tablePlan != nil)
-	is.addPushedDownSelection(cop, ds, math.MaxFloat64)
+	indexConds, tblConds := splitIndexFilterConditions(remainedConds, idx.Columns, ds.tableInfo)
+	path := &indexPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64}
+	is.addPushedDownSelection(cop, ds, math.MaxFloat64, path)
 	t := finishCopTask(ds.ctx, cop)
 	return t.plan()
 }
@@ -294,7 +295,8 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) {
 }
 
 // convertToIndexScan converts the DataSource to index scan with idx.
-func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInfo) (task task, err error) {
+func (ds *DataSource) convertToIndexScan(prop *requiredProp, path *indexPath) (task task, err error) {
+	idx := path.index
 	is := PhysicalIndexScan{
 		Table:            ds.tableInfo,
 		TableAsName:      ds.TableAsName,
@@ -308,25 +310,10 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
 		is.Hist = &statsTbl.Indices[idx.ID].Histogram
 	}
 	rowCount := float64(statsTbl.Count)
-	sc := ds.ctx.GetSessionVars().StmtCtx
-	idxCols, colLengths := expression.IndexInfo2Cols(ds.Schema().Columns, idx)
 	is.Ranges = ranger.FullNewRange()
 	eqCount := 0
-	if len(ds.pushedDownConds) > 0 {
-		is.conditions = ds.pushedDownConds
-		if len(idxCols) > 0 {
-			is.Ranges, is.AccessCondition, is.filterCondition, eqCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, colLengths)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-			rowCount, err = statsTbl.GetRowCountByIndexRanges(sc, is.Index.ID, is.Ranges)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-		} else {
-			is.filterCondition = ds.pushedDownConds
-		}
-	}
+	is.AccessCondition, is.Ranges, is.filterCondition, eqCount = path.accessConds, path.ranges, path.indexFilters, path.eqCondCount
+	rowCount = path.countAfterAccess
 	cop := &copTask{indexPlan: is}
 	if !isCoveringIndex(is.Columns, is.Index.Columns, is.Table.PKIsHandle) {
 		// On this way, it's double read case.
@@ -358,12 +345,8 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
 	// Only use expectedCnt when it's smaller than the count we calculated.
 	// e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate
 	// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
-	if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count {
-		selectivity, err := statsTbl.Selectivity(ds.ctx, is.filterCondition)
-		if err != nil {
-			log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
-			selectivity = selectionFactor
-		}
+	if matchProperty && prop.expectedCnt < path.countAfterIndex {
+		selectivity := path.countAfterIndex / path.countAfterAccess
 		rowCount = math.Min(prop.expectedCnt/selectivity, rowCount)
 	}
 	is.stats = ds.stats.scaleByExpectCnt(rowCount)
@@ -379,15 +362,15 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
 		}
 		cop.keepOrder = true
 		is.KeepOrder = true
-		is.addPushedDownSelection(cop, ds, prop.expectedCnt)
+		is.addPushedDownSelection(cop, ds, prop.expectedCnt, path)
 	} else {
 		expectedCnt := math.MaxFloat64
 		if prop.isEmpty() {
 			expectedCnt = prop.expectedCnt
 		} else {
 			return invalidTask, nil
 		}
-		is.addPushedDownSelection(cop, ds, expectedCnt)
+		is.addPushedDownSelection(cop, ds, expectedCnt, path)
 	}
 	if prop.taskTp == rootTaskType {
 		task = finishCopTask(ds.ctx, task)
@@ -418,29 +401,23 @@ func (is *PhysicalIndexScan) initSchema(id int, idx *model.IndexInfo, isDoubleRe
 	is.SetSchema(expression.NewSchema(indexCols...))
 }
 
-func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64) {
+func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64, path *indexPath) {
 	// Add filter condition to table plan now.
-	if len(is.filterCondition) > 0 {
-		var indexConds, tableConds []expression.Expression
-		if copTask.tablePlan != nil {
-			indexConds, tableConds = splitIndexFilterConditions(is.filterCondition, is.Index.Columns, is.Table)
-		} else {
-			indexConds = is.filterCondition
-		}
-		if indexConds != nil {
-			indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx,
-				p.getStatsByFilter(append(is.AccessCondition, indexConds...)).scaleByExpectCnt(expectedCnt))
-			indexSel.SetChildren(is)
-			copTask.indexPlan = indexSel
-			copTask.cst += copTask.count() * cpuFactor
-		}
-		if tableConds != nil {
-			copTask.finishIndexPlan()
-			tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt))
-			tableSel.SetChildren(copTask.tablePlan)
-			copTask.tablePlan = tableSel
-			copTask.cst += copTask.count() * cpuFactor
-		}
+	indexConds, tableConds := path.indexFilters, path.tableFilters
+	if indexConds != nil {
+		stats := &statsInfo{count: path.countAfterIndex}
+		indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx,
+			stats.scaleByExpectCnt(expectedCnt))
+		indexSel.SetChildren(is)
+		copTask.indexPlan = indexSel
+		copTask.cst += copTask.count() * cpuFactor
+	}
+	if tableConds != nil {
+		copTask.finishIndexPlan()
+		tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt))
+		tableSel.SetChildren(copTask.tablePlan)
+		copTask.tablePlan = tableSel
+		copTask.cst += copTask.count() * cpuFactor
 	}
 }
 
@@ -524,7 +501,7 @@ func (ds *DataSource) forceToTableScan(pk *expression.Column) PhysicalPlan {
 }
 
 // convertToTableScan converts the DataSource to table scan.
-func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err error) {
+func (ds *DataSource) convertToTableScan(prop *requiredProp, path *indexPath) (task task, err error) {
 	// It will be handled in convertToIndexScan.
 	if prop.taskTp == copDoubleReadTaskType {
 		return &copTask{cst: math.MaxFloat64}, nil
@@ -537,7 +514,6 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err
 		DBName:      ds.DBName,
 	}.init(ds.ctx)
 	ts.SetSchema(ds.schema)
-	sc := ds.ctx.GetSessionVars().StmtCtx
 	var pkCol *expression.Column
 	if ts.Table.PKIsHandle {
 		if pkColInfo := ts.Table.GetPkColInfo(); pkColInfo != nil {
@@ -547,29 +523,9 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err
 			}
 		}
 	}
-	if pkCol != nil {
-		ts.Ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
-	} else {
-		ts.Ranges = ranger.FullIntNewRange(false)
-	}
-	statsTbl := ds.statisticTable
-	rowCount := float64(statsTbl.Count)
-	if len(ds.pushedDownConds) > 0 {
-		if pkCol != nil {
-			ts.AccessCondition, ts.filterCondition = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
-			ts.Ranges, err = ranger.BuildTableRange(ts.AccessCondition, sc, pkCol.RetType)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-			// TODO: We can use ds.getStatsByFilter(accessConditions).
-			rowCount, err = statsTbl.GetRowCountByIntColumnRanges(sc, pkCol.ID, ts.Ranges)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-		} else {
-			ts.filterCondition = ds.pushedDownConds
-		}
-	}
+	ts.Ranges = path.ranges
+	ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters
+	rowCount := path.countAfterAccess
 	copTask := &copTask{
 		tablePlan:         ts,
 		indexPlanFinished: true,
@@ -580,11 +536,7 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err
 	// e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate
 	// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
 	if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count {
-		selectivity, err := statsTbl.Selectivity(ds.ctx, ts.filterCondition)
-		if err != nil {
-			log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
-			selectivity = selectionFactor
-		}
+		selectivity := ds.statsAfterSelect.count / rowCount
 		rowCount = math.Min(prop.expectedCnt/selectivity, rowCount)
 	}
 	ts.stats = ds.stats.scaleByExpectCnt(rowCount)

diff --git a/plan/plan.go b/plan/plan.go
@@ -161,7 +161,7 @@ type LogicalPlan interface {
 	pushDownTopN(topN *LogicalTopN) LogicalPlan
 
 	// deriveStats derives statistic info between plans.
-	deriveStats() *statsInfo
+	deriveStats() (*statsInfo, error)
 
 	// preparePossibleProperties is only used for join and aggregation. Like group by a,b,c, all permutation of (a,b,c) is
 	// valid, but the ordered indices in leaf plan is limited. So we can get all possible order properties by a pre-walking.