Skip to content

Commit

Permalink
plan: calculate table access paths in DataSource.deriveStats (#6346)
Browse files Browse the repository at this point in the history
  • Loading branch information
winoros authored and zz-jason committed Apr 28, 2018
1 parent 8b57ce2 commit 50426a9
Show file tree
Hide file tree
Showing 10 changed files with 303 additions and 212 deletions.
7 changes: 5 additions & 2 deletions plan/build_key_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() {
func (ds *DataSource) buildKeyInfo() {
ds.schema.Keys = nil
ds.baseLogicalPlan.buildKeyInfo()
indices := ds.availableIndices.indices
for _, idx := range indices {
for _, path := range ds.possibleAccessPaths {
if path.isTablePath {
continue
}
idx := path.index
if !idx.Unique {
continue
}
Expand Down
17 changes: 13 additions & 4 deletions plan/gen_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,14 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
if !ok {
return nil
}
indices := x.availableIndices.indices
includeTableScan := x.availableIndices.includeTableScan
if includeTableScan && len(innerJoinKeys) == 1 {
var tblPath *accessPath
for _, path := range x.possibleAccessPaths {
if path.isTablePath {
tblPath = path
break
}
}
if tblPath != nil && len(innerJoinKeys) == 1 {
pkCol := x.getPKIsHandleCol()
if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) {
innerPlan := x.forceToTableScan(pkCol)
Expand All @@ -272,7 +277,11 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
remainedOfBest []expression.Expression
keyOff2IdxOff []int
)
for _, indexInfo := range indices {
for _, path := range x.possibleAccessPaths {
if path.isTablePath {
continue
}
indexInfo := path.index
ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys)
// We choose the index by the number of used columns of the range, the much the better.
// Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid.
Expand Down
14 changes: 7 additions & 7 deletions plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
tableInfo := tbl.Meta()
b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "")

availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo)
possiblePaths, err := getPossibleAccessPaths(tn.IndexHints, tableInfo)
if err != nil {
b.err = errors.Trace(err)
return nil
Expand All @@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
}

ds := DataSource{
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
availableIndices: availableIdxes,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
possibleAccessPaths: possiblePaths,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
}.init(b.ctx)

var handleCol *expression.Column
Expand Down
87 changes: 82 additions & 5 deletions plan/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,18 @@
package plan

import (
"math"

"github.com/juju/errors"
"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/expression/aggregation"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
log "github.com/sirupsen/logrus"
)

var (
Expand Down Expand Up @@ -299,13 +304,85 @@ type DataSource struct {

statisticTable *statistics.Table

// availableIndices is used for storing result of availableIndices function.
availableIndices *availableIndices
// possibleAccessPaths stores all the possible access path for physical plan, including table scan.
possibleAccessPaths []*accessPath
}

// accessPath tells how we access one index or just access table.
type accessPath struct {
index *model.IndexInfo
ranges []*ranger.Range
// countAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
countAfterAccess float64
// countAfterIndex is the row count after we apply filters on index and before we apply the table filters.
countAfterIndex float64
accessConds []expression.Expression
eqCondCount int
indexFilters []expression.Expression
tableFilters []expression.Expression
// isTablePath indicates whether this path is table path.
isTablePath bool
// forced means this path is generated by `use/force index()`.
forced bool
}

func (ds *DataSource) deriveTablePathStats(path *accessPath) error {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.countAfterAccess = float64(ds.statisticTable.Count)
path.tableFilters = ds.pushedDownConds
var pkCol *expression.Column
if ds.tableInfo.PKIsHandle {
if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil {
pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo)
}
}
if pkCol == nil {
path.ranges = ranger.FullIntRange(false)
return nil
}
path.ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
if len(ds.pushedDownConds) == 0 {
return nil
}
path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType)
if err != nil {
return errors.Trace(err)
}
path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges)
return errors.Trace(err)
}

type availableIndices struct {
indices []*model.IndexInfo
includeTableScan bool
func (ds *DataSource) deriveIndexPathStats(path *accessPath) error {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.ranges = ranger.FullRange()
path.countAfterAccess = float64(ds.statisticTable.Count)
idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index)
if len(idxCols) != 0 {
path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths)
if err != nil {
return errors.Trace(err)
}
path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
if err != nil {
return errors.Trace(err)
}
path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo)
} else {
path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo)
}
path.countAfterIndex = path.countAfterAccess
if path.indexFilters != nil {
selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
}
path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count)
}
return nil
}

func (ds *DataSource) getPKIsHandleCol() *expression.Column {
Expand Down
5 changes: 4 additions & 1 deletion plan/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) {

func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) {
logic.preparePossibleProperties()
logic.deriveStats()
_, err := logic.deriveStats()
if err != nil {
return nil, errors.Trace(err)
}
t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64})
if err != nil {
return nil, errors.Trace(err)
Expand Down
Loading

0 comments on commit 50426a9

Please sign in to comment.