Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan: calc access path when doing deriveStats. #6346

Merged
merged 11 commits into from
Apr 28, 2018
7 changes: 5 additions & 2 deletions plan/build_key_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() {
func (ds *DataSource) buildKeyInfo() {
ds.schema.Keys = nil
ds.baseLogicalPlan.buildKeyInfo()
indices := ds.availableIndices.indices
for _, idx := range indices {
for _, path := range ds.possibleAccessPaths {
if path.isTablePath {
continue
}
idx := path.index
if !idx.Unique {
continue
}
Expand Down
19 changes: 11 additions & 8 deletions plan/gen_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,15 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
if !ok {
return nil
}
indices := x.availableIndices.indices
includeTableScan := x.availableIndices.includeTableScan
if includeTableScan && len(innerJoinKeys) == 1 {
pkCol := x.getPKIsHandleCol()
if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) {
innerPlan := x.forceToTableScan(pkCol)
return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, nil, nil)
accessPaths := x.possibleAccessPaths
if len(accessPaths) > 0 && accessPaths[0].isTablePath {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when will len(accessPaths) == 0?

accessPaths = accessPaths[1:]
if len(innerJoinKeys) == 1 {
pkCol := x.getPKIsHandleCol()
if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) {
innerPlan := x.forceToTableScan(pkCol)
return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, nil, nil)
}
}
}
var (
Expand All @@ -272,7 +274,8 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
remainedOfBest []expression.Expression
keyOff2IdxOff []int
)
for _, indexInfo := range indices {
for _, path := range accessPaths {
indexInfo := path.index
ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys)
// We choose the index by the number of used columns of the range, the much the better.
// Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid.
Expand Down
14 changes: 7 additions & 7 deletions plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
tableInfo := tbl.Meta()
b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "")

availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo)
possiblePaths, err := getPossibleAccessPaths(tn.IndexHints, tableInfo)
if err != nil {
b.err = errors.Trace(err)
return nil
Expand All @@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
}

ds := DataSource{
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
availableIndices: availableIdxes,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
possibleAccessPaths: possiblePaths,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
}.init(b.ctx)

var handleCol *expression.Column
Expand Down
88 changes: 83 additions & 5 deletions plan/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,18 @@
package plan

import (
"math"

"github.com/juju/errors"
"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/expression/aggregation"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
log "github.com/sirupsen/logrus"
)

var (
Expand Down Expand Up @@ -299,13 +304,86 @@ type DataSource struct {

statisticTable *statistics.Table

// availableIndices is used for storing result of availableIndices function.
availableIndices *availableIndices
// possibleAccessPaths stores all the possible access path for physical plan, including table scan.
// Please make sure table path is always the first element if we have table path.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we check isTablePath in each path, then why do we need to make sure the table path is the first?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For index join, if there's pk and match the condition, we will directly choose it.
Use a loop to find the tablePath first.
So i removed this constraint.

possibleAccessPaths []*accessPath
}

// accessPath tells how we access one index or just access table.
type accessPath struct {
index *model.IndexInfo
ranges []*ranger.Range
// countAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
countAfterAccess float64
// countAfterIndex is the row count after we apply filters on index and before we apply the table filters.
countAfterIndex float64
accessConds []expression.Expression
eqCondCount int
indexFilters []expression.Expression
tableFilters []expression.Expression
// isTablePath indicates whether this path is table path.
isTablePath bool
// forced means this path is generated by `use/force index()`.
forced bool
}

func (ds *DataSource) deriveTablePathStats(path *accessPath) error {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.countAfterAccess = float64(ds.statisticTable.Count)
path.tableFilters = ds.pushedDownConds
var pkCol *expression.Column
if ds.tableInfo.PKIsHandle {
if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil {
pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo)
}
}
if pkCol == nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'better add some test cases to cover pkCol == nil and pkCol !=nil.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'better add some test cases to cover pkCol == nil and pkCol !=nil.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the status of pkCol is covered executor's unit test

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But why all test cases can be passed when line 336~340 was missed before,
we may need some test for checking this case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me check it first.

path.ranges = ranger.FullIntRange(false)
return nil
}
path.ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
if len(ds.pushedDownConds) == 0 {
return nil
}
path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType)
if err != nil {
return errors.Trace(err)
}
path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges)
return errors.Trace(err)
}

type availableIndices struct {
indices []*model.IndexInfo
includeTableScan bool
func (ds *DataSource) deriveIndexPathStats(path *accessPath) error {
var err error
sc := ds.ctx.GetSessionVars().StmtCtx
path.ranges = ranger.FullRange()
path.countAfterAccess = float64(ds.statisticTable.Count)
idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index)
if len(idxCols) != 0 {
path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths)
if err != nil {
return errors.Trace(err)
}
path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
if err != nil {
return errors.Trace(err)
}
path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo)
} else {
path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo)
}
path.countAfterIndex = path.countAfterAccess
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicated with line 364?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed one.

if path.indexFilters != nil {
selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
}
path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count)
}
return nil
}

func (ds *DataSource) getPKIsHandleCol() *expression.Column {
Expand Down
5 changes: 4 additions & 1 deletion plan/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) {

func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) {
logic.preparePossibleProperties()
logic.deriveStats()
_, err := logic.deriveStats()
if err != nil {
return nil, errors.Trace(err)
}
t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64})
if err != nil {
return nil, errors.Trace(err)
Expand Down
Loading