-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
plan: calc access path
when doing deriveStats
.
#6346
Changes from 1 commit
391e389
2039b18
50fd959
ba77bdf
d1a7691
8192bfa
5d3620f
d170ac3
e528c54
8d5775b
e6bc81b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ import ( | |
"github.com/pingcap/tidb/mysql" | ||
"github.com/pingcap/tidb/statistics" | ||
"github.com/pingcap/tidb/types" | ||
"github.com/pingcap/tidb/util/ranger" | ||
) | ||
|
||
var ( | ||
|
@@ -299,13 +300,23 @@ type DataSource struct { | |
|
||
statisticTable *statistics.Table | ||
|
||
// availableIndices is used for storing result of availableIndices function. | ||
availableIndices *availableIndices | ||
// possibleIndexPaths stores all the possible index path for physical plan, including table scan. | ||
// Please make true table scan is always the first element. | ||
possibleIndexPaths []*indexPath | ||
} | ||
|
||
type availableIndices struct { | ||
indices []*model.IndexInfo | ||
includeTableScan bool | ||
type indexPath struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add comments for this type. |
||
index *model.IndexInfo | ||
ranges []*ranger.NewRange | ||
countAfterAccess float64 | ||
countAfterIndex float64 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add comments about the two count. |
||
accessConds []expression.Expression | ||
eqCondCount int | ||
indexFilters []expression.Expression | ||
tableFilters []expression.Expression | ||
filterUnmatched bool | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not used? |
||
isRowID bool | ||
forced bool | ||
} | ||
|
||
func (ds *DataSource) getPKIsHandleCol() *expression.Column { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,6 @@ import ( | |
"github.com/pingcap/tidb/mysql" | ||
"github.com/pingcap/tidb/types" | ||
"github.com/pingcap/tidb/util/ranger" | ||
log "github.com/sirupsen/logrus" | ||
) | ||
|
||
const ( | ||
|
@@ -202,30 +201,30 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) { | |
return t, nil | ||
} | ||
|
||
indices := ds.availableIndices.indices | ||
includeTableScan := ds.availableIndices.includeTableScan | ||
t = invalidTask | ||
if includeTableScan { | ||
t, err = ds.convertToTableScan(prop) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
} | ||
if !includeTableScan || len(ds.pushedDownConds) > 0 || len(prop.cols) > 0 { | ||
for i, idx := range indices { | ||
// TODO: We can also check if the prop matches the index columns. | ||
if !ds.relevantIndices[i] && len(prop.cols) == 0 { | ||
continue | ||
} | ||
idxTask, err := ds.convertToIndexScan(prop, idx) | ||
|
||
for _, path := range ds.possibleIndexPaths { | ||
if path.isRowID { | ||
tblTask, err := ds.convertToTableScan(prop, path) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
if idxTask.cost() < t.cost() { | ||
t = idxTask | ||
if tblTask.cost() < t.cost() { | ||
t = tblTask | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about adding a continue in this code block ? |
||
} else { | ||
if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { | ||
idxTask, err := ds.convertToIndexScan(prop, path) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
if idxTask.cost() < t.cost() { | ||
t = idxTask | ||
} | ||
} | ||
} | ||
} | ||
|
||
ds.storeTask(prop, t) | ||
return t, nil | ||
} | ||
|
@@ -276,7 +275,9 @@ func (ds *DataSource) forceToIndexScan(idx *model.IndexInfo, remainedConds []exp | |
cop.tablePlan = ts | ||
} | ||
is.initSchema(ds.id, idx, cop.tablePlan != nil) | ||
is.addPushedDownSelection(cop, ds, math.MaxFloat64) | ||
indexConds, tblConds := splitIndexFilterConditions(remainedConds, idx.Columns, ds.tableInfo) | ||
path := &indexPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64} | ||
is.addPushedDownSelection(cop, ds, math.MaxFloat64, path) | ||
t := finishCopTask(ds.ctx, cop) | ||
return t.plan() | ||
} | ||
|
@@ -294,7 +295,8 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) { | |
} | ||
|
||
// convertToIndexScan converts the DataSource to index scan with idx. | ||
func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInfo) (task task, err error) { | ||
func (ds *DataSource) convertToIndexScan(prop *requiredProp, path *indexPath) (task task, err error) { | ||
idx := path.index | ||
is := PhysicalIndexScan{ | ||
Table: ds.tableInfo, | ||
TableAsName: ds.TableAsName, | ||
|
@@ -308,25 +310,10 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf | |
is.Hist = &statsTbl.Indices[idx.ID].Histogram | ||
} | ||
rowCount := float64(statsTbl.Count) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can be removed |
||
sc := ds.ctx.GetSessionVars().StmtCtx | ||
idxCols, colLengths := expression.IndexInfo2Cols(ds.Schema().Columns, idx) | ||
is.Ranges = ranger.FullNewRange() | ||
eqCount := 0 | ||
if len(ds.pushedDownConds) > 0 { | ||
is.conditions = ds.pushedDownConds | ||
if len(idxCols) > 0 { | ||
is.Ranges, is.AccessCondition, is.filterCondition, eqCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, colLengths) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
rowCount, err = statsTbl.GetRowCountByIndexRanges(sc, is.Index.ID, is.Ranges) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
} else { | ||
is.filterCondition = ds.pushedDownConds | ||
} | ||
} | ||
is.AccessCondition, is.Ranges, is.filterCondition, eqCount = path.accessConds, path.ranges, path.indexFilters, path.eqCondCount | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line is too long, can we split it to multi-lines ? |
||
rowCount = path.countAfterAccess | ||
cop := &copTask{indexPlan: is} | ||
if !isCoveringIndex(is.Columns, is.Index.Columns, is.Table.PKIsHandle) { | ||
// On this way, it's double read case. | ||
|
@@ -358,12 +345,8 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf | |
// Only use expectedCnt when it's smaller than the count we calculated. | ||
// e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate | ||
// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. | ||
if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count { | ||
selectivity, err := statsTbl.Selectivity(ds.ctx, is.filterCondition) | ||
if err != nil { | ||
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) | ||
selectivity = selectionFactor | ||
} | ||
if matchProperty && prop.expectedCnt < path.countAfterIndex { | ||
selectivity := path.countAfterIndex / path.countAfterAccess | ||
rowCount = math.Min(prop.expectedCnt/selectivity, rowCount) | ||
} | ||
is.stats = ds.stats.scaleByExpectCnt(rowCount) | ||
|
@@ -379,15 +362,15 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf | |
} | ||
cop.keepOrder = true | ||
is.KeepOrder = true | ||
is.addPushedDownSelection(cop, ds, prop.expectedCnt) | ||
is.addPushedDownSelection(cop, ds, prop.expectedCnt, path) | ||
} else { | ||
expectedCnt := math.MaxFloat64 | ||
if prop.isEmpty() { | ||
expectedCnt = prop.expectedCnt | ||
} else { | ||
return invalidTask, nil | ||
} | ||
is.addPushedDownSelection(cop, ds, expectedCnt) | ||
is.addPushedDownSelection(cop, ds, expectedCnt, path) | ||
} | ||
if prop.taskTp == rootTaskType { | ||
task = finishCopTask(ds.ctx, task) | ||
|
@@ -418,29 +401,23 @@ func (is *PhysicalIndexScan) initSchema(id int, idx *model.IndexInfo, isDoubleRe | |
is.SetSchema(expression.NewSchema(indexCols...)) | ||
} | ||
|
||
func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64) { | ||
func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64, path *indexPath) { | ||
// Add filter condition to table plan now. | ||
if len(is.filterCondition) > 0 { | ||
var indexConds, tableConds []expression.Expression | ||
if copTask.tablePlan != nil { | ||
indexConds, tableConds = splitIndexFilterConditions(is.filterCondition, is.Index.Columns, is.Table) | ||
} else { | ||
indexConds = is.filterCondition | ||
} | ||
if indexConds != nil { | ||
indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx, | ||
p.getStatsByFilter(append(is.AccessCondition, indexConds...)).scaleByExpectCnt(expectedCnt)) | ||
indexSel.SetChildren(is) | ||
copTask.indexPlan = indexSel | ||
copTask.cst += copTask.count() * cpuFactor | ||
} | ||
if tableConds != nil { | ||
copTask.finishIndexPlan() | ||
tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt)) | ||
tableSel.SetChildren(copTask.tablePlan) | ||
copTask.tablePlan = tableSel | ||
copTask.cst += copTask.count() * cpuFactor | ||
} | ||
indexConds, tableConds := path.indexFilters, path.tableFilters | ||
if indexConds != nil { | ||
stats := &statsInfo{count: path.countAfterIndex} | ||
indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx, | ||
stats.scaleByExpectCnt(expectedCnt)) | ||
indexSel.SetChildren(is) | ||
copTask.indexPlan = indexSel | ||
copTask.cst += copTask.count() * cpuFactor | ||
} | ||
if tableConds != nil { | ||
copTask.finishIndexPlan() | ||
tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt)) | ||
tableSel.SetChildren(copTask.tablePlan) | ||
copTask.tablePlan = tableSel | ||
copTask.cst += copTask.count() * cpuFactor | ||
} | ||
} | ||
|
||
|
@@ -524,7 +501,7 @@ func (ds *DataSource) forceToTableScan(pk *expression.Column) PhysicalPlan { | |
} | ||
|
||
// convertToTableScan converts the DataSource to table scan. | ||
func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err error) { | ||
func (ds *DataSource) convertToTableScan(prop *requiredProp, path *indexPath) (task task, err error) { | ||
// It will be handled in convertToIndexScan. | ||
if prop.taskTp == copDoubleReadTaskType { | ||
return &copTask{cst: math.MaxFloat64}, nil | ||
|
@@ -537,7 +514,6 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err | |
DBName: ds.DBName, | ||
}.init(ds.ctx) | ||
ts.SetSchema(ds.schema) | ||
sc := ds.ctx.GetSessionVars().StmtCtx | ||
var pkCol *expression.Column | ||
if ts.Table.PKIsHandle { | ||
if pkColInfo := ts.Table.GetPkColInfo(); pkColInfo != nil { | ||
|
@@ -547,29 +523,9 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err | |
} | ||
} | ||
} | ||
if pkCol != nil { | ||
ts.Ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) | ||
} else { | ||
ts.Ranges = ranger.FullIntNewRange(false) | ||
} | ||
statsTbl := ds.statisticTable | ||
rowCount := float64(statsTbl.Count) | ||
if len(ds.pushedDownConds) > 0 { | ||
if pkCol != nil { | ||
ts.AccessCondition, ts.filterCondition = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) | ||
ts.Ranges, err = ranger.BuildTableRange(ts.AccessCondition, sc, pkCol.RetType) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
// TODO: We can use ds.getStatsByFilter(accessConditions). | ||
rowCount, err = statsTbl.GetRowCountByIntColumnRanges(sc, pkCol.ID, ts.Ranges) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
} else { | ||
ts.filterCondition = ds.pushedDownConds | ||
} | ||
} | ||
ts.Ranges = path.ranges | ||
ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters | ||
rowCount := path.countAfterAccess | ||
copTask := &copTask{ | ||
tablePlan: ts, | ||
indexPlanFinished: true, | ||
|
@@ -580,11 +536,7 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err | |
// e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate | ||
// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. | ||
if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count { | ||
selectivity, err := statsTbl.Selectivity(ds.ctx, ts.filterCondition) | ||
if err != nil { | ||
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) | ||
selectivity = selectionFactor | ||
} | ||
selectivity := ds.statsAfterSelect.count / rowCount | ||
rowCount = math.Min(prop.expectedCnt/selectivity, rowCount) | ||
} | ||
ts.stats = ds.stats.scaleByExpectCnt(rowCount) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make sure