Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan: calc access path when doing deriveStats. #6346

Merged
merged 11 commits into from
Apr 28, 2018
7 changes: 5 additions & 2 deletions plan/build_key_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() {
func (ds *DataSource) buildKeyInfo() {
ds.schema.Keys = nil
ds.baseLogicalPlan.buildKeyInfo()
indices := ds.availableIndices.indices
for _, idx := range indices {
for _, path := range ds.possibleIndexPaths {
if path.isRowID {
continue
}
idx := path.index
if !idx.Unique {
continue
}
Expand Down
9 changes: 5 additions & 4 deletions plan/gen_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,9 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
if !ok {
return nil
}
indices := x.availableIndices.indices
includeTableScan := x.availableIndices.includeTableScan
if includeTableScan && len(innerJoinKeys) == 1 {
indexPaths := x.possibleIndexPaths
if len(x.possibleIndexPaths) > 0 && x.possibleIndexPaths[0].isRowID {
indexPaths = indexPaths[1:]
pkCol := x.getPKIsHandleCol()
if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) {
innerPlan := x.forceToTableScan(pkCol)
Expand All @@ -272,7 +272,8 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [
remainedOfBest []expression.Expression
keyOff2IdxOff []int
)
for _, indexInfo := range indices {
for _, path := range indexPaths {
indexInfo := path.index
ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys)
// We choose the index by the number of used columns of the range, the much the better.
// Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid.
Expand Down
14 changes: 7 additions & 7 deletions plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
tableInfo := tbl.Meta()
b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "")

availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo)
possiblePaths, err := getPossibleIndexPaths(tn.IndexHints, tableInfo)
if err != nil {
b.err = errors.Trace(err)
return nil
Expand All @@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
}

ds := DataSource{
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
availableIndices: availableIdxes,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
DBName: dbName,
tableInfo: tableInfo,
statisticTable: b.getStatsTable(tableInfo),
indexHints: tn.IndexHints,
possibleIndexPaths: possiblePaths,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
}.init(b.ctx)

var handleCol *expression.Column
Expand Down
21 changes: 16 additions & 5 deletions plan/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
)

var (
Expand Down Expand Up @@ -299,13 +300,23 @@ type DataSource struct {

statisticTable *statistics.Table

// availableIndices is used for storing result of availableIndices function.
availableIndices *availableIndices
// possibleIndexPaths stores all the possible index path for physical plan, including table scan.
// Please make true table scan is always the first element.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make sure

possibleIndexPaths []*indexPath
}

type availableIndices struct {
indices []*model.IndexInfo
includeTableScan bool
type indexPath struct {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add comments for this type.

index *model.IndexInfo
ranges []*ranger.NewRange
countAfterAccess float64
countAfterIndex float64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comments about the two count.

accessConds []expression.Expression
eqCondCount int
indexFilters []expression.Expression
tableFilters []expression.Expression
filterUnmatched bool
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not used?

isRowID bool
forced bool
}

func (ds *DataSource) getPKIsHandleCol() *expression.Column {
Expand Down
5 changes: 4 additions & 1 deletion plan/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) {

func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) {
logic.preparePossibleProperties()
logic.deriveStats()
_, err := logic.deriveStats()
if err != nil {
return nil, errors.Trace(err)
}
t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64})
if err != nil {
return nil, errors.Trace(err)
Expand Down
146 changes: 49 additions & 97 deletions plan/physical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
log "github.com/sirupsen/logrus"
)

const (
Expand Down Expand Up @@ -202,30 +201,30 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) {
return t, nil
}

indices := ds.availableIndices.indices
includeTableScan := ds.availableIndices.includeTableScan
t = invalidTask
if includeTableScan {
t, err = ds.convertToTableScan(prop)
if err != nil {
return nil, errors.Trace(err)
}
}
if !includeTableScan || len(ds.pushedDownConds) > 0 || len(prop.cols) > 0 {
for i, idx := range indices {
// TODO: We can also check if the prop matches the index columns.
if !ds.relevantIndices[i] && len(prop.cols) == 0 {
continue
}
idxTask, err := ds.convertToIndexScan(prop, idx)

for _, path := range ds.possibleIndexPaths {
if path.isRowID {
tblTask, err := ds.convertToTableScan(prop, path)
if err != nil {
return nil, errors.Trace(err)
}
if idxTask.cost() < t.cost() {
t = idxTask
if tblTask.cost() < t.cost() {
t = tblTask
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about adding a continue in this code block ?

} else {
if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced {
idxTask, err := ds.convertToIndexScan(prop, path)
if err != nil {
return nil, errors.Trace(err)
}
if idxTask.cost() < t.cost() {
t = idxTask
}
}
}
}

ds.storeTask(prop, t)
return t, nil
}
Expand Down Expand Up @@ -276,7 +275,9 @@ func (ds *DataSource) forceToIndexScan(idx *model.IndexInfo, remainedConds []exp
cop.tablePlan = ts
}
is.initSchema(ds.id, idx, cop.tablePlan != nil)
is.addPushedDownSelection(cop, ds, math.MaxFloat64)
indexConds, tblConds := splitIndexFilterConditions(remainedConds, idx.Columns, ds.tableInfo)
path := &indexPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64}
is.addPushedDownSelection(cop, ds, math.MaxFloat64, path)
t := finishCopTask(ds.ctx, cop)
return t.plan()
}
Expand All @@ -294,7 +295,8 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) {
}

// convertToIndexScan converts the DataSource to index scan with idx.
func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInfo) (task task, err error) {
func (ds *DataSource) convertToIndexScan(prop *requiredProp, path *indexPath) (task task, err error) {
idx := path.index
is := PhysicalIndexScan{
Table: ds.tableInfo,
TableAsName: ds.TableAsName,
Expand All @@ -308,25 +310,10 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
is.Hist = &statsTbl.Indices[idx.ID].Histogram
}
rowCount := float64(statsTbl.Count)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can be removed

sc := ds.ctx.GetSessionVars().StmtCtx
idxCols, colLengths := expression.IndexInfo2Cols(ds.Schema().Columns, idx)
is.Ranges = ranger.FullNewRange()
eqCount := 0
if len(ds.pushedDownConds) > 0 {
is.conditions = ds.pushedDownConds
if len(idxCols) > 0 {
is.Ranges, is.AccessCondition, is.filterCondition, eqCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, colLengths)
if err != nil {
return nil, errors.Trace(err)
}
rowCount, err = statsTbl.GetRowCountByIndexRanges(sc, is.Index.ID, is.Ranges)
if err != nil {
return nil, errors.Trace(err)
}
} else {
is.filterCondition = ds.pushedDownConds
}
}
is.AccessCondition, is.Ranges, is.filterCondition, eqCount = path.accessConds, path.ranges, path.indexFilters, path.eqCondCount
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line is too long, can we split it to multi-lines ?

rowCount = path.countAfterAccess
cop := &copTask{indexPlan: is}
if !isCoveringIndex(is.Columns, is.Index.Columns, is.Table.PKIsHandle) {
// On this way, it's double read case.
Expand Down Expand Up @@ -358,12 +345,8 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
// Only use expectedCnt when it's smaller than the count we calculated.
// e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate
// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count {
selectivity, err := statsTbl.Selectivity(ds.ctx, is.filterCondition)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
}
if matchProperty && prop.expectedCnt < path.countAfterIndex {
selectivity := path.countAfterIndex / path.countAfterAccess
rowCount = math.Min(prop.expectedCnt/selectivity, rowCount)
}
is.stats = ds.stats.scaleByExpectCnt(rowCount)
Expand All @@ -379,15 +362,15 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf
}
cop.keepOrder = true
is.KeepOrder = true
is.addPushedDownSelection(cop, ds, prop.expectedCnt)
is.addPushedDownSelection(cop, ds, prop.expectedCnt, path)
} else {
expectedCnt := math.MaxFloat64
if prop.isEmpty() {
expectedCnt = prop.expectedCnt
} else {
return invalidTask, nil
}
is.addPushedDownSelection(cop, ds, expectedCnt)
is.addPushedDownSelection(cop, ds, expectedCnt, path)
}
if prop.taskTp == rootTaskType {
task = finishCopTask(ds.ctx, task)
Expand Down Expand Up @@ -418,29 +401,23 @@ func (is *PhysicalIndexScan) initSchema(id int, idx *model.IndexInfo, isDoubleRe
is.SetSchema(expression.NewSchema(indexCols...))
}

func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64) {
func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64, path *indexPath) {
// Add filter condition to table plan now.
if len(is.filterCondition) > 0 {
var indexConds, tableConds []expression.Expression
if copTask.tablePlan != nil {
indexConds, tableConds = splitIndexFilterConditions(is.filterCondition, is.Index.Columns, is.Table)
} else {
indexConds = is.filterCondition
}
if indexConds != nil {
indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx,
p.getStatsByFilter(append(is.AccessCondition, indexConds...)).scaleByExpectCnt(expectedCnt))
indexSel.SetChildren(is)
copTask.indexPlan = indexSel
copTask.cst += copTask.count() * cpuFactor
}
if tableConds != nil {
copTask.finishIndexPlan()
tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt))
tableSel.SetChildren(copTask.tablePlan)
copTask.tablePlan = tableSel
copTask.cst += copTask.count() * cpuFactor
}
indexConds, tableConds := path.indexFilters, path.tableFilters
if indexConds != nil {
stats := &statsInfo{count: path.countAfterIndex}
indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx,
stats.scaleByExpectCnt(expectedCnt))
indexSel.SetChildren(is)
copTask.indexPlan = indexSel
copTask.cst += copTask.count() * cpuFactor
}
if tableConds != nil {
copTask.finishIndexPlan()
tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt))
tableSel.SetChildren(copTask.tablePlan)
copTask.tablePlan = tableSel
copTask.cst += copTask.count() * cpuFactor
}
}

Expand Down Expand Up @@ -524,7 +501,7 @@ func (ds *DataSource) forceToTableScan(pk *expression.Column) PhysicalPlan {
}

// convertToTableScan converts the DataSource to table scan.
func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err error) {
func (ds *DataSource) convertToTableScan(prop *requiredProp, path *indexPath) (task task, err error) {
// It will be handled in convertToIndexScan.
if prop.taskTp == copDoubleReadTaskType {
return &copTask{cst: math.MaxFloat64}, nil
Expand All @@ -537,7 +514,6 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err
DBName: ds.DBName,
}.init(ds.ctx)
ts.SetSchema(ds.schema)
sc := ds.ctx.GetSessionVars().StmtCtx
var pkCol *expression.Column
if ts.Table.PKIsHandle {
if pkColInfo := ts.Table.GetPkColInfo(); pkColInfo != nil {
Expand All @@ -547,29 +523,9 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err
}
}
}
if pkCol != nil {
ts.Ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag))
} else {
ts.Ranges = ranger.FullIntNewRange(false)
}
statsTbl := ds.statisticTable
rowCount := float64(statsTbl.Count)
if len(ds.pushedDownConds) > 0 {
if pkCol != nil {
ts.AccessCondition, ts.filterCondition = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol)
ts.Ranges, err = ranger.BuildTableRange(ts.AccessCondition, sc, pkCol.RetType)
if err != nil {
return nil, errors.Trace(err)
}
// TODO: We can use ds.getStatsByFilter(accessConditions).
rowCount, err = statsTbl.GetRowCountByIntColumnRanges(sc, pkCol.ID, ts.Ranges)
if err != nil {
return nil, errors.Trace(err)
}
} else {
ts.filterCondition = ds.pushedDownConds
}
}
ts.Ranges = path.ranges
ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters
rowCount := path.countAfterAccess
copTask := &copTask{
tablePlan: ts,
indexPlanFinished: true,
Expand All @@ -580,11 +536,7 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err
// e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate
// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count {
selectivity, err := statsTbl.Selectivity(ds.ctx, ts.filterCondition)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
}
selectivity := ds.statsAfterSelect.count / rowCount
rowCount = math.Min(prop.expectedCnt/selectivity, rowCount)
}
ts.stats = ds.stats.scaleByExpectCnt(rowCount)
Expand Down
2 changes: 1 addition & 1 deletion plan/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ type LogicalPlan interface {
pushDownTopN(topN *LogicalTopN) LogicalPlan

// deriveStats derives statistic info between plans.
deriveStats() *statsInfo
deriveStats() (*statsInfo, error)

// preparePossibleProperties is only used for join and aggregation. Like group by a,b,c, all permutation of (a,b,c) is
// valid, but the ordered indices in leaf plan is limited. So we can get all possible order properties by a pre-walking.
Expand Down
Loading