Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan: calc access path when doing deriveStats. #6346

Merged
merged 11 commits into from
Apr 28, 2018
7 changes: 5 additions & 2 deletions plan/build_key_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() {
func (ds *DataSource) buildKeyInfo() {
ds.schema.Keys = nil
ds.baseLogicalPlan.buildKeyInfo()
indices := ds.availableIndices.indices
for _, idx := range indices {
for _, path := range ds.possibleAccessPaths {
if path.isTablePath {
continue
}
idx := path.index
if !idx.Unique {
continue
}
Expand Down
17 changes: 13 additions & 4 deletions plan/gen_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,14 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
if !ok {
return nil
}
indices := x.availableIndices.indices
includeTableScan := x.availableIndices.includeTableScan
if includeTableScan && len(innerJoinKeys) == 1 {
var tblPath *accessPath
for _, path := range x.possibleAccessPaths {
if path.isTablePath {
tblPath = path
break
}
}
if tblPath != nil && len(innerJoinKeys) == 1 {
pkCol := x.getPKIsHandleCol()
if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) {
innerPlan := x.forceToTableScan(pkCol)
Expand All @@ -272,7 +277,11 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
remainedOfBest []expression.Expression
keyOff2IdxOff []int
)
for _, indexInfo := range indices {
for _, path := range x.possibleAccessPaths {
if path.isTablePath {
continue
}
indexInfo := path.index
ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys)
// We choose the index by the number of used columns of the range, the much the better.
// Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid.
Expand Down
14 changes: 7 additions & 7 deletions plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
tableInfo := tbl.Meta()
b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "")

availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo)
possiblePaths, err := getPossibleAccessPaths(tn.IndexHints, tableInfo)
if err != nil {
b.err = errors.Trace(err)
return nil
Expand All @@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
}

ds := DataSource{
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
availableIndices: availableIdxes,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
possibleAccessPaths: possiblePaths,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
}.init(b.ctx)

var handleCol *expression.Column
Expand Down
87 changes: 82 additions & 5 deletions plan/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,18 @@
package plan

import (
"math"

"github.com/juju/errors"
"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/expression/aggregation"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
log "github.com/sirupsen/logrus"
)

var (
Expand Down Expand Up @@ -299,13 +304,85 @@ type DataSource struct {

statisticTable *statistics.Table

// availableIndices is used for storing result of availableIndices function.
availableIndices *availableIndices
// possibleAccessPaths stores all the possible access path for physical plan, including table scan.
possibleAccessPaths []*accessPath
}

// accessPath tells how we access one index or just access table.
type accessPath struct {
index *model.IndexInfo
ranges []*ranger.Range
// countAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
countAfterAccess float64
// countAfterIndex is the row count after we apply filters on index and before we apply the table filters.
countAfterIndex float64
accessConds []expression.Expression
eqCondCount int
indexFilters []expression.Expression
tableFilters []expression.Expression
// isTablePath indicates whether this path is table path.
isTablePath bool
// forced means this path is generated by `use/force index()`.
forced bool
}

func (ds *DataSource) deriveTablePathStats(path *accessPath) error {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.countAfterAccess = float64(ds.statisticTable.Count)
path.tableFilters = ds.pushedDownConds
var pkCol *expression.Column
if ds.tableInfo.PKIsHandle {
if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil {
pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo)
}
}
if pkCol == nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'better add some test cases to cover pkCol == nil and pkCol !=nil.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the status of pkCol is covered executor's unit test

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But why all test cases can be passed when line 336~340 was missed before,
we may need some test for checking this case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me check it first.

path.ranges = ranger.FullIntRange(false)
return nil
}
path.ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
if len(ds.pushedDownConds) == 0 {
return nil
}
path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType)
if err != nil {
return errors.Trace(err)
}
path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges)
return errors.Trace(err)
}

type availableIndices struct {
indices []*model.IndexInfo
includeTableScan bool
func (ds *DataSource) deriveIndexPathStats(path *accessPath) error {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.ranges = ranger.FullRange()
path.countAfterAccess = float64(ds.statisticTable.Count)
idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index)
if len(idxCols) != 0 {
path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths)
if err != nil {
return errors.Trace(err)
}
path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
if err != nil {
return errors.Trace(err)
}
path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo)
} else {
path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo)
}
path.countAfterIndex = path.countAfterAccess
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicated with line 364?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed one.

if path.indexFilters != nil {
selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
}
path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count)
}
return nil
}

func (ds *DataSource) getPKIsHandleCol() *expression.Column {
Expand Down
5 changes: 4 additions & 1 deletion plan/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) {

func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) {
logic.preparePossibleProperties()
logic.deriveStats()
_, err := logic.deriveStats()
if err != nil {
return nil, errors.Trace(err)
}
t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64})
if err != nil {
return nil, errors.Trace(err)
Expand Down
Loading