From 50426a99b20a4995ae440c870a0d90c83030100d Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Sat, 28 Apr 2018 15:58:31 +0800 Subject: [PATCH] plan: calculate table access paths in `DataSource.deriveStats` (#6346) --- plan/build_key_info.go | 7 +- plan/gen_physical_plans.go | 17 +++- plan/logical_plan_builder.go | 14 ++-- plan/logical_plans.go | 87 ++++++++++++++++++-- plan/optimizer.go | 5 +- plan/physical_plan_builder.go | 145 ++++++++++++---------------------- plan/plan.go | 2 +- plan/planbuilder.go | 65 ++++++++------- plan/property_cols_prune.go | 36 +++------ plan/stats.go | 137 ++++++++++++++++++++++---------- 10 files changed, 303 insertions(+), 212 deletions(-) diff --git a/plan/build_key_info.go b/plan/build_key_info.go index 639f2230df5c0..24cf42072bc7a 100644 --- a/plan/build_key_info.go +++ b/plan/build_key_info.go @@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() { func (ds *DataSource) buildKeyInfo() { ds.schema.Keys = nil ds.baseLogicalPlan.buildKeyInfo() - indices := ds.availableIndices.indices - for _, idx := range indices { + for _, path := range ds.possibleAccessPaths { + if path.isTablePath { + continue + } + idx := path.index if !idx.Unique { continue } diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go index c5b9691042cd6..1a665e56d8593 100644 --- a/plan/gen_physical_plans.go +++ b/plan/gen_physical_plans.go @@ -256,9 +256,14 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ if !ok { return nil } - indices := x.availableIndices.indices - includeTableScan := x.availableIndices.includeTableScan - if includeTableScan && len(innerJoinKeys) == 1 { + var tblPath *accessPath + for _, path := range x.possibleAccessPaths { + if path.isTablePath { + tblPath = path + break + } + } + if tblPath != nil && len(innerJoinKeys) == 1 { pkCol := x.getPKIsHandleCol() if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) { innerPlan := x.forceToTableScan(pkCol) @@ -272,7 +277,11 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ remainedOfBest []expression.Expression keyOff2IdxOff []int ) - for _, indexInfo := range indices { + for _, path := range x.possibleAccessPaths { + if path.isTablePath { + continue + } + indexInfo := path.index ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys) // We choose the index by the number of used columns of the range, the much the better. // Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid. diff --git a/plan/logical_plan_builder.go b/plan/logical_plan_builder.go index 3d4be5dc6c067..d1f9986cc925c 100644 --- a/plan/logical_plan_builder.go +++ b/plan/logical_plan_builder.go @@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { tableInfo := tbl.Meta() b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "") - availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo) + possiblePaths, err := getPossibleAccessPaths(tn.IndexHints, tableInfo) if err != nil { b.err = errors.Trace(err) return nil @@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { } ds := DataSource{ - DBName: dbName, - tableInfo: tableInfo, - statisticTable: b.getStatsTable(tableInfo), - indexHints: tn.IndexHints, - availableIndices: availableIdxes, - Columns: make([]*model.ColumnInfo, 0, len(columns)), + DBName: dbName, + tableInfo: tableInfo, + statisticTable: b.getStatsTable(tableInfo), + indexHints: tn.IndexHints, + possibleAccessPaths: possiblePaths, + Columns: make([]*model.ColumnInfo, 0, len(columns)), }.init(b.ctx) var handleCol *expression.Column diff --git a/plan/logical_plans.go b/plan/logical_plans.go index fe484a304a0f5..53fd2b4e3e9bb 100644 --- a/plan/logical_plans.go +++ b/plan/logical_plans.go @@ -14,6 +14,9 @@ package plan import ( + "math" + + "github.com/juju/errors" "github.com/pingcap/tidb/ast" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/expression/aggregation" @@ -21,6 +24,8 @@ import ( "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/ranger" + log "github.com/sirupsen/logrus" ) var ( @@ -299,13 +304,85 @@ type DataSource struct { statisticTable *statistics.Table - // availableIndices is used for storing result of availableIndices function. - availableIndices *availableIndices + // possibleAccessPaths stores all the possible access path for physical plan, including table scan. + possibleAccessPaths []*accessPath +} + +// accessPath tells how we access one index or just access table. +type accessPath struct { + index *model.IndexInfo + ranges []*ranger.Range + // countAfterAccess is the row count after we apply range seek and before we use other filter to filter data. + countAfterAccess float64 + // countAfterIndex is the row count after we apply filters on index and before we apply the table filters. + countAfterIndex float64 + accessConds []expression.Expression + eqCondCount int + indexFilters []expression.Expression + tableFilters []expression.Expression + // isTablePath indicates whether this path is table path. + isTablePath bool + // forced means this path is generated by `use/force index()`. + forced bool +} + +func (ds *DataSource) deriveTablePathStats(path *accessPath) error { + var err error + sc := ds.ctx.GetSessionVars().StmtCtx + path.countAfterAccess = float64(ds.statisticTable.Count) + path.tableFilters = ds.pushedDownConds + var pkCol *expression.Column + if ds.tableInfo.PKIsHandle { + if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil { + pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo) + } + } + if pkCol == nil { + path.ranges = ranger.FullIntRange(false) + return nil + } + path.ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) + if len(ds.pushedDownConds) == 0 { + return nil + } + path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) + path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType) + if err != nil { + return errors.Trace(err) + } + path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges) + return errors.Trace(err) } -type availableIndices struct { - indices []*model.IndexInfo - includeTableScan bool +func (ds *DataSource) deriveIndexPathStats(path *accessPath) error { + var err error + sc := ds.ctx.GetSessionVars().StmtCtx + path.ranges = ranger.FullRange() + path.countAfterAccess = float64(ds.statisticTable.Count) + idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index) + if len(idxCols) != 0 { + path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths) + if err != nil { + return errors.Trace(err) + } + path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges) + if err != nil { + return errors.Trace(err) + } + path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo) + } else { + path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo) + } + path.countAfterIndex = path.countAfterAccess + if path.indexFilters != nil { + selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters) + if err != nil { + log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) + selectivity = selectionFactor + } + path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count) + } + return nil } func (ds *DataSource) getPKIsHandleCol() *expression.Column { diff --git a/plan/optimizer.go b/plan/optimizer.go index 47e0de2a08035..9c7c01ea7f390 100644 --- a/plan/optimizer.go +++ b/plan/optimizer.go @@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) { func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) { logic.preparePossibleProperties() - logic.deriveStats() + _, err := logic.deriveStats() + if err != nil { + return nil, errors.Trace(err) + } t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64}) if err != nil { return nil, errors.Trace(err) diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go index 5193412e6a472..cb931305a9699 100644 --- a/plan/physical_plan_builder.go +++ b/plan/physical_plan_builder.go @@ -23,7 +23,6 @@ import ( "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/ranger" - log "github.com/sirupsen/logrus" ) const ( @@ -197,22 +196,25 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) { return t, nil } - indices := ds.availableIndices.indices - includeTableScan := ds.availableIndices.includeTableScan t = invalidTask - if includeTableScan { - t, err = ds.convertToTableScan(prop) - if err != nil { - return nil, errors.Trace(err) - } - } - if !includeTableScan || len(ds.pushedDownConds) > 0 || len(prop.cols) > 0 { - for i, idx := range indices { - // TODO: We can also check if the prop matches the index columns. - if !ds.relevantIndices[i] && len(prop.cols) == 0 { - continue + + for _, path := range ds.possibleAccessPaths { + if path.isTablePath { + tblTask, err := ds.convertToTableScan(prop, path) + if err != nil { + return nil, errors.Trace(err) + } + if tblTask.cost() < t.cost() { + t = tblTask } - idxTask, err := ds.convertToIndexScan(prop, idx) + continue + } + // We will use index to generate physical plan if: + // this path's access cond is not nil or + // we have prop to match or + // this index is forced to choose. + if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { + idxTask, err := ds.convertToIndexScan(prop, path) if err != nil { return nil, errors.Trace(err) } @@ -221,6 +223,7 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) { } } } + ds.storeTask(prop, t) return t, nil } @@ -271,7 +274,9 @@ func (ds *DataSource) forceToIndexScan(idx *model.IndexInfo, remainedConds []exp cop.tablePlan = ts } is.initSchema(ds.id, idx, cop.tablePlan != nil) - is.addPushedDownSelection(cop, ds, math.MaxFloat64) + indexConds, tblConds := splitIndexFilterConditions(remainedConds, idx.Columns, ds.tableInfo) + path := &accessPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64} + is.addPushedDownSelection(cop, ds, math.MaxFloat64, path) t := finishCopTask(ds.ctx, cop) return t.plan() } @@ -289,7 +294,8 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) { } // convertToIndexScan converts the DataSource to index scan with idx. -func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInfo) (task task, err error) { +func (ds *DataSource) convertToIndexScan(prop *requiredProp, path *accessPath) (task task, err error) { + idx := path.index is := PhysicalIndexScan{ Table: ds.tableInfo, TableAsName: ds.TableAsName, @@ -302,26 +308,10 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf if statsTbl.Indices[idx.ID] != nil { is.Hist = &statsTbl.Indices[idx.ID].Histogram } - rowCount := float64(statsTbl.Count) - sc := ds.ctx.GetSessionVars().StmtCtx - idxCols, colLengths := expression.IndexInfo2Cols(ds.Schema().Columns, idx) is.Ranges = ranger.FullRange() eqCount := 0 - if len(ds.pushedDownConds) > 0 { - is.conditions = ds.pushedDownConds - if len(idxCols) > 0 { - is.Ranges, is.AccessCondition, is.filterCondition, eqCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, colLengths) - if err != nil { - return nil, errors.Trace(err) - } - rowCount, err = statsTbl.GetRowCountByIndexRanges(sc, is.Index.ID, is.Ranges) - if err != nil { - return nil, errors.Trace(err) - } - } else { - is.filterCondition = ds.pushedDownConds - } - } + is.AccessCondition, is.Ranges, is.filterCondition, eqCount = path.accessConds, path.ranges, path.indexFilters, path.eqCondCount + rowCount := path.countAfterAccess cop := &copTask{indexPlan: is} if !isCoveringIndex(is.Columns, is.Index.Columns, is.Table.PKIsHandle) { // On this way, it's double read case. @@ -353,12 +343,8 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf // Only use expectedCnt when it's smaller than the count we calculated. // e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. - if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count { - selectivity, err := statsTbl.Selectivity(ds.ctx, is.filterCondition) - if err != nil { - log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) - selectivity = selectionFactor - } + if matchProperty && prop.expectedCnt < path.countAfterIndex { + selectivity := path.countAfterIndex / path.countAfterAccess rowCount = math.Min(prop.expectedCnt/selectivity, rowCount) } is.stats = ds.stats.scaleByExpectCnt(rowCount) @@ -374,7 +360,7 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf } cop.keepOrder = true is.KeepOrder = true - is.addPushedDownSelection(cop, ds, prop.expectedCnt) + is.addPushedDownSelection(cop, ds, prop.expectedCnt, path) } else { expectedCnt := math.MaxFloat64 if prop.isEmpty() { @@ -382,7 +368,7 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf } else { return invalidTask, nil } - is.addPushedDownSelection(cop, ds, expectedCnt) + is.addPushedDownSelection(cop, ds, expectedCnt, path) } if prop.taskTp == rootTaskType { task = finishCopTask(ds.ctx, task) @@ -413,29 +399,23 @@ func (is *PhysicalIndexScan) initSchema(id int, idx *model.IndexInfo, isDoubleRe is.SetSchema(expression.NewSchema(indexCols...)) } -func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64) { +func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64, path *accessPath) { // Add filter condition to table plan now. - if len(is.filterCondition) > 0 { - var indexConds, tableConds []expression.Expression - if copTask.tablePlan != nil { - indexConds, tableConds = splitIndexFilterConditions(is.filterCondition, is.Index.Columns, is.Table) - } else { - indexConds = is.filterCondition - } - if indexConds != nil { - indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx, - p.getStatsByFilter(append(is.AccessCondition, indexConds...)).scaleByExpectCnt(expectedCnt)) - indexSel.SetChildren(is) - copTask.indexPlan = indexSel - copTask.cst += copTask.count() * cpuFactor - } - if tableConds != nil { - copTask.finishIndexPlan() - tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt)) - tableSel.SetChildren(copTask.tablePlan) - copTask.tablePlan = tableSel - copTask.cst += copTask.count() * cpuFactor - } + indexConds, tableConds := path.indexFilters, path.tableFilters + if indexConds != nil { + stats := &statsInfo{count: path.countAfterIndex} + indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx, + stats.scaleByExpectCnt(expectedCnt)) + indexSel.SetChildren(is) + copTask.indexPlan = indexSel + copTask.cst += copTask.count() * cpuFactor + } + if tableConds != nil { + copTask.finishIndexPlan() + tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt)) + tableSel.SetChildren(copTask.tablePlan) + copTask.tablePlan = tableSel + copTask.cst += copTask.count() * cpuFactor } } @@ -519,7 +499,7 @@ func (ds *DataSource) forceToTableScan(pk *expression.Column) PhysicalPlan { } // convertToTableScan converts the DataSource to table scan. -func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err error) { +func (ds *DataSource) convertToTableScan(prop *requiredProp, path *accessPath) (task task, err error) { // It will be handled in convertToIndexScan. if prop.taskTp == copDoubleReadTaskType { return &copTask{cst: math.MaxFloat64}, nil @@ -532,7 +512,6 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err DBName: ds.DBName, }.init(ds.ctx) ts.SetSchema(ds.schema) - sc := ds.ctx.GetSessionVars().StmtCtx var pkCol *expression.Column if ts.Table.PKIsHandle { if pkColInfo := ts.Table.GetPkColInfo(); pkColInfo != nil { @@ -542,29 +521,9 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err } } } - if pkCol != nil { - ts.Ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) - } else { - ts.Ranges = ranger.FullIntRange(false) - } - statsTbl := ds.statisticTable - rowCount := float64(statsTbl.Count) - if len(ds.pushedDownConds) > 0 { - if pkCol != nil { - ts.AccessCondition, ts.filterCondition = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) - ts.Ranges, err = ranger.BuildTableRange(ts.AccessCondition, sc, pkCol.RetType) - if err != nil { - return nil, errors.Trace(err) - } - // TODO: We can use ds.getStatsByFilter(accessConditions). - rowCount, err = statsTbl.GetRowCountByIntColumnRanges(sc, pkCol.ID, ts.Ranges) - if err != nil { - return nil, errors.Trace(err) - } - } else { - ts.filterCondition = ds.pushedDownConds - } - } + ts.Ranges = path.ranges + ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters + rowCount := path.countAfterAccess copTask := &copTask{ tablePlan: ts, indexPlanFinished: true, @@ -575,11 +534,7 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err // e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count { - selectivity, err := statsTbl.Selectivity(ds.ctx, ts.filterCondition) - if err != nil { - log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) - selectivity = selectionFactor - } + selectivity := ds.statsAfterSelect.count / rowCount rowCount = math.Min(prop.expectedCnt/selectivity, rowCount) } ts.stats = ds.stats.scaleByExpectCnt(rowCount) diff --git a/plan/plan.go b/plan/plan.go index 5296b44c35b81..84344be0f5417 100644 --- a/plan/plan.go +++ b/plan/plan.go @@ -161,7 +161,7 @@ type LogicalPlan interface { pushDownTopN(topN *LogicalTopN) LogicalPlan // deriveStats derives statistic info between plans. - deriveStats() *statsInfo + deriveStats() (*statsInfo, error) // preparePossibleProperties is only used for join and aggregation. Like group by a,b,c, all permutation of (a,b,c) is // valid, but the ordered indices in leaf plan is limited. So we can get all possible order properties by a pre-walking. diff --git a/plan/planbuilder.go b/plan/planbuilder.go index 837c7743dec4a..52fd021569084 100644 --- a/plan/planbuilder.go +++ b/plan/planbuilder.go @@ -317,17 +317,27 @@ func (b *planBuilder) detectSelectAgg(sel *ast.SelectStmt) bool { return false } -func getAvailableIndices(indexHints []*ast.IndexHint, tableInfo *model.TableInfo) (*availableIndices, error) { - publicIndices := make([]*model.IndexInfo, 0, len(tableInfo.Indices)) - for _, index := range tableInfo.Indices { +func getPathByIndexName(paths []*accessPath, idxName model.CIStr) *accessPath { + for _, path := range paths { + if path.index.Name.L == idxName.L { + return path + } + } + return nil +} + +func getPossibleAccessPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInfo) ([]*accessPath, error) { + publicPaths := make([]*accessPath, 0, len(tblInfo.Indices)+1) + publicPaths = append(publicPaths, &accessPath{isTablePath: true}) + for _, index := range tblInfo.Indices { if index.State == model.StatePublic { - publicIndices = append(publicIndices, index) + publicPaths = append(publicPaths, &accessPath{index: index}) } } hasScanHint, hasUseOrForce := false, false - available := make([]*model.IndexInfo, 0, len(indexHints)) - ignored := make([]*model.IndexInfo, 0, len(indexHints)) + available := make([]*accessPath, 0, len(publicPaths)) + ignored := make([]*accessPath, 0, len(publicPaths)) for _, hint := range indexHints { if hint.HintScope != ast.HintForScan { continue @@ -335,47 +345,48 @@ func getAvailableIndices(indexHints []*ast.IndexHint, tableInfo *model.TableInfo hasScanHint = true for _, idxName := range hint.IndexNames { - idx := findIndexByName(publicIndices, idxName) - if idx == nil { - return nil, ErrKeyDoesNotExist.GenByArgs(idxName, tableInfo.Name) + path := getPathByIndexName(publicPaths[1:], idxName) + if path == nil { + return nil, ErrKeyDoesNotExist.GenByArgs(idxName, tblInfo.Name) } if hint.HintType == ast.HintIgnore { // Collect all the ignored index hints. - ignored = append(ignored, idx) + ignored = append(ignored, path) continue } // Currently we don't distinguish between "FORCE" and "USE" because // our cost estimation is not reliable. hasUseOrForce = true - available = append(available, idx) + path.forced = true + available = append(available, path) } } - if !hasScanHint { - return &availableIndices{publicIndices, true}, nil - } - if !hasUseOrForce { - available = removeIgnoredIndices(publicIndices, ignored) - return &availableIndices{available, true}, nil + if !hasScanHint || !hasUseOrForce { + available = publicPaths } - available = removeIgnoredIndices(available, ignored) + available = removeIgnoredPaths(available, ignored) + // If we have got "FORCE" or "USE" index hint but got no available index, // we have to use table scan. - return &availableIndices{available, len(available) == 0}, nil + if len(available) == 0 { + available = append(available, &accessPath{isTablePath: true}) + } + return available, nil } -func removeIgnoredIndices(indices, ignores []*model.IndexInfo) []*model.IndexInfo { - if len(ignores) == 0 { - return indices +func removeIgnoredPaths(paths, ignoredPaths []*accessPath) []*accessPath { + if len(ignoredPaths) == 0 { + return paths } - var remainedIndices []*model.IndexInfo - for _, index := range indices { - if findIndexByName(ignores, index.Name) == nil { - remainedIndices = append(remainedIndices, index) + remainedPaths := make([]*accessPath, 0, len(paths)) + for _, path := range paths { + if path.isTablePath || getPathByIndexName(ignoredPaths, path.index.Name) == nil { + remainedPaths = append(remainedPaths, path) } } - return remainedIndices + return remainedPaths } func findIndexByName(indices []*model.IndexInfo, name model.CIStr) *model.IndexInfo { diff --git a/plan/property_cols_prune.go b/plan/property_cols_prune.go index 5449ed87ae3c4..1333c9610f255 100644 --- a/plan/property_cols_prune.go +++ b/plan/property_cols_prune.go @@ -18,35 +18,17 @@ import ( ) func (ds *DataSource) preparePossibleProperties() [][]*expression.Column { - indices := ds.availableIndices.indices - includeTS := ds.availableIndices.includeTableScan + result := make([][]*expression.Column, 0, len(ds.possibleAccessPaths)) - result := make([][]*expression.Column, 0, len(indices)) - ds.relevantIndices = make([]bool, len(indices)) - - if includeTS { - col := ds.getPKIsHandleCol() - if col != nil { - result = append(result, []*expression.Column{col}) - } - - cols := expression.ExtractColumnsFromExpressions(make([]*expression.Column, 0, 10), ds.pushedDownConds, nil) - colsSet := make(map[string]struct{}, len(cols)) - for _, col := range cols { - colsSet[col.ColName.L] = struct{}{} - } - - for i, idx := range indices { - _, ok := colsSet[idx.Columns[0].Name.L] - ds.relevantIndices[i] = ok - } - } else { - for i := range ds.relevantIndices { - ds.relevantIndices[i] = true + for _, path := range ds.possibleAccessPaths { + if path.isTablePath { + col := ds.getPKIsHandleCol() + if col != nil { + result = append(result, []*expression.Column{col}) + } + continue } - } - for _, idx := range indices { - cols, _ := expression.IndexInfo2Cols(ds.schema.Columns, idx) + cols, _ := expression.IndexInfo2Cols(ds.schema.Columns, path.index) if len(cols) > 0 { result = append(result, cols) } diff --git a/plan/stats.go b/plan/stats.go index b1d7ea7081264..c10b133f94f91 100644 --- a/plan/stats.go +++ b/plan/stats.go @@ -17,6 +17,7 @@ import ( "fmt" "math" + "github.com/juju/errors" "github.com/pingcap/tidb/expression" log "github.com/sirupsen/logrus" ) @@ -62,7 +63,7 @@ func (p *basePhysicalPlan) StatsInfo() *statsInfo { return p.stats } -func (p *LogicalTableDual) deriveStats() *statsInfo { +func (p *LogicalTableDual) deriveStats() (*statsInfo, error) { profile := &statsInfo{ count: float64(p.RowCount), cardinality: make([]float64, p.Schema().Len()), @@ -71,17 +72,18 @@ func (p *LogicalTableDual) deriveStats() *statsInfo { profile.cardinality[i] = float64(p.RowCount) } p.stats = profile - return p.stats + return p.stats, nil } -func (p *baseLogicalPlan) deriveStats() *statsInfo { +func (p *baseLogicalPlan) deriveStats() (*statsInfo, error) { if len(p.children) > 1 { panic("LogicalPlans with more than one child should implement their own deriveStats().") } if len(p.children) == 1 { - p.stats = p.children[0].deriveStats() - return p.stats + var err error + p.stats, err = p.children[0].deriveStats() + return p.stats, errors.Trace(err) } profile := &statsInfo{ @@ -92,7 +94,7 @@ func (p *baseLogicalPlan) deriveStats() *statsInfo { profile.cardinality[i] = float64(1) } p.stats = profile - return profile + return profile, nil } func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo { @@ -118,37 +120,59 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo { return profile.scale(selectivity) } -func (ds *DataSource) deriveStats() *statsInfo { +func (ds *DataSource) deriveStats() (*statsInfo, error) { // PushDownNot here can convert query 'not (a != 1)' to 'a = 1'. for i, expr := range ds.pushedDownConds { ds.pushedDownConds[i] = expression.PushDownNot(nil, expr, false) } ds.statsAfterSelect = ds.getStatsByFilter(ds.pushedDownConds) - return ds.statsAfterSelect + for _, path := range ds.possibleAccessPaths { + if path.isTablePath { + err := ds.deriveTablePathStats(path) + if err != nil { + return nil, errors.Trace(err) + } + continue + } + err := ds.deriveIndexPathStats(path) + if err != nil { + return nil, errors.Trace(err) + } + } + return ds.statsAfterSelect, nil } -func (p *LogicalSelection) deriveStats() *statsInfo { - childProfile := p.children[0].deriveStats() +func (p *LogicalSelection) deriveStats() (*statsInfo, error) { + childProfile, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = childProfile.scale(selectionFactor) - return p.stats + return p.stats, nil } -func (p *LogicalUnionAll) deriveStats() *statsInfo { +func (p *LogicalUnionAll) deriveStats() (*statsInfo, error) { p.stats = &statsInfo{ cardinality: make([]float64, p.Schema().Len()), } for _, child := range p.children { - childProfile := child.deriveStats() + childProfile, err := child.deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats.count += childProfile.count for i := range p.stats.cardinality { p.stats.cardinality[i] += childProfile.cardinality[i] } } - return p.stats + return p.stats, nil } -func (p *LogicalLimit) deriveStats() *statsInfo { - childProfile := p.children[0].deriveStats() +func (p *LogicalLimit) deriveStats() (*statsInfo, error) { + childProfile, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = &statsInfo{ count: math.Min(float64(p.Count), childProfile.count), cardinality: make([]float64, len(childProfile.cardinality)), @@ -156,11 +180,14 @@ func (p *LogicalLimit) deriveStats() *statsInfo { for i := range p.stats.cardinality { p.stats.cardinality[i] = math.Min(childProfile.cardinality[i], p.stats.count) } - return p.stats + return p.stats, nil } -func (lt *LogicalTopN) deriveStats() *statsInfo { - childProfile := lt.children[0].deriveStats() +func (lt *LogicalTopN) deriveStats() (*statsInfo, error) { + childProfile, err := lt.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } lt.stats = &statsInfo{ count: math.Min(float64(lt.Count), childProfile.count), cardinality: make([]float64, len(childProfile.cardinality)), @@ -168,7 +195,7 @@ func (lt *LogicalTopN) deriveStats() *statsInfo { for i := range lt.stats.cardinality { lt.stats.cardinality[i] = math.Min(childProfile.cardinality[i], lt.stats.count) } - return lt.stats + return lt.stats, nil } // getCardinality will return the cardinality of a couple of columns. We simply return the max one, because we cannot know @@ -187,8 +214,11 @@ func getCardinality(cols []*expression.Column, schema *expression.Schema, profil return cardinality } -func (p *LogicalProjection) deriveStats() *statsInfo { - childProfile := p.children[0].deriveStats() +func (p *LogicalProjection) deriveStats() (*statsInfo, error) { + childProfile, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = &statsInfo{ count: childProfile.count, cardinality: make([]float64, len(p.Exprs)), @@ -197,11 +227,14 @@ func (p *LogicalProjection) deriveStats() *statsInfo { cols := expression.ExtractColumns(expr) p.stats.cardinality[i] = getCardinality(cols, p.children[0].Schema(), childProfile) } - return p.stats + return p.stats, nil } -func (la *LogicalAggregation) deriveStats() *statsInfo { - childProfile := la.children[0].deriveStats() +func (la *LogicalAggregation) deriveStats() (*statsInfo, error) { + childProfile, err := la.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } gbyCols := make([]*expression.Column, 0, len(la.GroupByItems)) for _, gbyExpr := range la.GroupByItems { cols := expression.ExtractColumns(gbyExpr) @@ -217,7 +250,7 @@ func (la *LogicalAggregation) deriveStats() *statsInfo { la.stats.cardinality[i] = cardinality } la.inputCount = childProfile.count - return la.stats + return la.stats, nil } // deriveStats prepares statsInfo. @@ -227,9 +260,15 @@ func (la *LogicalAggregation) deriveStats() *statsInfo { // N(s) stands for the number of rows in relation s. V(s.key) means the cardinality of join key in s. // This is a quite simple strategy: We assume every bucket of relation which will participate join has the same number of rows, and apply cross join for // every matched bucket. -func (p *LogicalJoin) deriveStats() *statsInfo { - leftProfile := p.children[0].deriveStats() - rightProfile := p.children[1].deriveStats() +func (p *LogicalJoin) deriveStats() (*statsInfo, error) { + leftProfile, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } + rightProfile, err := p.children[1].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } if p.JoinType == SemiJoin || p.JoinType == AntiSemiJoin { p.stats = &statsInfo{ count: leftProfile.count * selectionFactor, @@ -238,7 +277,7 @@ func (p *LogicalJoin) deriveStats() *statsInfo { for i := range p.stats.cardinality { p.stats.cardinality[i] = leftProfile.cardinality[i] * selectionFactor } - return p.stats + return p.stats, nil } if p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin { p.stats = &statsInfo{ @@ -247,14 +286,14 @@ func (p *LogicalJoin) deriveStats() *statsInfo { } copy(p.stats.cardinality, leftProfile.cardinality) p.stats.cardinality[len(p.stats.cardinality)-1] = 2.0 - return p.stats + return p.stats, nil } if 0 == len(p.EqualConditions) { p.stats = &statsInfo{ count: leftProfile.count * rightProfile.count, cardinality: append(leftProfile.cardinality, rightProfile.cardinality...), } - return p.stats + return p.stats, nil } leftKeys := make([]*expression.Column, 0, len(p.EqualConditions)) rightKeys := make([]*expression.Column, 0, len(p.EqualConditions)) @@ -280,12 +319,18 @@ func (p *LogicalJoin) deriveStats() *statsInfo { count: count, cardinality: cardinality, } - return p.stats + return p.stats, nil } -func (la *LogicalApply) deriveStats() *statsInfo { - leftProfile := la.children[0].deriveStats() - _ = la.children[1].deriveStats() +func (la *LogicalApply) deriveStats() (*statsInfo, error) { + leftProfile, err := la.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } + _, err = la.children[1].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } la.stats = &statsInfo{ count: leftProfile.count, cardinality: make([]float64, la.schema.Len()), @@ -298,7 +343,7 @@ func (la *LogicalApply) deriveStats() *statsInfo { la.stats.cardinality[i] = leftProfile.count } } - return la.stats + return la.stats, nil } // Exists and MaxOneRow produce at most one row, so we set the count of stats one. @@ -313,14 +358,20 @@ func getSingletonStats(len int) *statsInfo { return ret } -func (p *LogicalExists) deriveStats() *statsInfo { - p.children[0].deriveStats() +func (p *LogicalExists) deriveStats() (*statsInfo, error) { + _, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = getSingletonStats(1) - return p.stats + return p.stats, nil } -func (p *LogicalMaxOneRow) deriveStats() *statsInfo { - p.children[0].deriveStats() +func (p *LogicalMaxOneRow) deriveStats() (*statsInfo, error) { + _, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = getSingletonStats(p.Schema().Len()) - return p.stats + return p.stats, nil }