From 391e38958762a9ed4a098055a79ed8f0a1d2abc6 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 23 Apr 2018 17:55:18 +0800 Subject: [PATCH 1/8] plan: calc `index path` when doing `deriveStats`. --- plan/build_key_info.go | 7 +- plan/gen_physical_plans.go | 9 +- plan/logical_plan_builder.go | 14 +-- plan/logical_plans.go | 21 +++- plan/optimizer.go | 5 +- plan/physical_plan_builder.go | 146 +++++++++----------------- plan/plan.go | 2 +- plan/planbuilder.go | 62 ++++++----- plan/property_cols_prune.go | 42 +++----- plan/stats.go | 190 ++++++++++++++++++++++++++-------- types/datum.go | 5 + 11 files changed, 289 insertions(+), 214 deletions(-) diff --git a/plan/build_key_info.go b/plan/build_key_info.go index 639f2230df5c0..ac548803b8312 100644 --- a/plan/build_key_info.go +++ b/plan/build_key_info.go @@ -174,8 +174,11 @@ func (p *LogicalJoin) buildKeyInfo() { func (ds *DataSource) buildKeyInfo() { ds.schema.Keys = nil ds.baseLogicalPlan.buildKeyInfo() - indices := ds.availableIndices.indices - for _, idx := range indices { + for _, path := range ds.possibleIndexPaths { + if path.isRowID { + continue + } + idx := path.index if !idx.Unique { continue } diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go index 371f3485bce0a..89c43e86ac9bf 100644 --- a/plan/gen_physical_plans.go +++ b/plan/gen_physical_plans.go @@ -256,9 +256,9 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ if !ok { return nil } - indices := x.availableIndices.indices - includeTableScan := x.availableIndices.includeTableScan - if includeTableScan && len(innerJoinKeys) == 1 { + indexPaths := x.possibleIndexPaths + if len(x.possibleIndexPaths) > 0 && x.possibleIndexPaths[0].isRowID { + indexPaths = indexPaths[1:] pkCol := x.getPKIsHandleCol() if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) { innerPlan := x.forceToTableScan(pkCol) @@ -272,7 +272,8 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ remainedOfBest []expression.Expression keyOff2IdxOff []int ) - for _, indexInfo := range indices { + for _, path := range indexPaths { + indexInfo := path.index ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys) // We choose the index by the number of used columns of the range, the much the better. // Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid. diff --git a/plan/logical_plan_builder.go b/plan/logical_plan_builder.go index 3d4be5dc6c067..a3b178e2887ac 100644 --- a/plan/logical_plan_builder.go +++ b/plan/logical_plan_builder.go @@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { tableInfo := tbl.Meta() b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "") - availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo) + possiblePaths, err := getPossibleIndexPaths(tn.IndexHints, tableInfo) if err != nil { b.err = errors.Trace(err) return nil @@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { } ds := DataSource{ - DBName: dbName, - tableInfo: tableInfo, - statisticTable: b.getStatsTable(tableInfo), - indexHints: tn.IndexHints, - availableIndices: availableIdxes, - Columns: make([]*model.ColumnInfo, 0, len(columns)), + DBName: dbName, + tableInfo: tableInfo, + statisticTable: b.getStatsTable(tableInfo), + indexHints: tn.IndexHints, + possibleIndexPaths: possiblePaths, + Columns: make([]*model.ColumnInfo, 0, len(columns)), }.init(b.ctx) var handleCol *expression.Column diff --git a/plan/logical_plans.go b/plan/logical_plans.go index c16af57be4801..9562228a201d8 100644 --- a/plan/logical_plans.go +++ b/plan/logical_plans.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/ranger" ) var ( @@ -299,13 +300,23 @@ type DataSource struct { statisticTable *statistics.Table - // availableIndices is used for storing result of availableIndices function. - availableIndices *availableIndices + // possibleIndexPaths stores all the possible index path for physical plan, including table scan. + // Please make true table scan is always the first element. + possibleIndexPaths []*indexPath } -type availableIndices struct { - indices []*model.IndexInfo - includeTableScan bool +type indexPath struct { + index *model.IndexInfo + ranges []*ranger.NewRange + countAfterAccess float64 + countAfterIndex float64 + accessConds []expression.Expression + eqCondCount int + indexFilters []expression.Expression + tableFilters []expression.Expression + filterUnmatched bool + isRowID bool + forced bool } func (ds *DataSource) getPKIsHandleCol() *expression.Column { diff --git a/plan/optimizer.go b/plan/optimizer.go index 47e0de2a08035..9c7c01ea7f390 100644 --- a/plan/optimizer.go +++ b/plan/optimizer.go @@ -147,7 +147,10 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) { func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) { logic.preparePossibleProperties() - logic.deriveStats() + _, err := logic.deriveStats() + if err != nil { + return nil, errors.Trace(err) + } t, err := logic.findBestTask(&requiredProp{taskTp: rootTaskType, expectedCnt: math.MaxFloat64}) if err != nil { return nil, errors.Trace(err) diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go index ba3167f4a2380..fdb4db57aebd1 100644 --- a/plan/physical_plan_builder.go +++ b/plan/physical_plan_builder.go @@ -24,7 +24,6 @@ import ( "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/ranger" - log "github.com/sirupsen/logrus" ) const ( @@ -202,30 +201,30 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) { return t, nil } - indices := ds.availableIndices.indices - includeTableScan := ds.availableIndices.includeTableScan t = invalidTask - if includeTableScan { - t, err = ds.convertToTableScan(prop) - if err != nil { - return nil, errors.Trace(err) - } - } - if !includeTableScan || len(ds.pushedDownConds) > 0 || len(prop.cols) > 0 { - for i, idx := range indices { - // TODO: We can also check if the prop matches the index columns. - if !ds.relevantIndices[i] && len(prop.cols) == 0 { - continue - } - idxTask, err := ds.convertToIndexScan(prop, idx) + + for _, path := range ds.possibleIndexPaths { + if path.isRowID { + tblTask, err := ds.convertToTableScan(prop, path) if err != nil { return nil, errors.Trace(err) } - if idxTask.cost() < t.cost() { - t = idxTask + if tblTask.cost() < t.cost() { + t = tblTask + } + } else { + if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { + idxTask, err := ds.convertToIndexScan(prop, path) + if err != nil { + return nil, errors.Trace(err) + } + if idxTask.cost() < t.cost() { + t = idxTask + } } } } + ds.storeTask(prop, t) return t, nil } @@ -276,7 +275,9 @@ func (ds *DataSource) forceToIndexScan(idx *model.IndexInfo, remainedConds []exp cop.tablePlan = ts } is.initSchema(ds.id, idx, cop.tablePlan != nil) - is.addPushedDownSelection(cop, ds, math.MaxFloat64) + indexConds, tblConds := splitIndexFilterConditions(remainedConds, idx.Columns, ds.tableInfo) + path := &indexPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64} + is.addPushedDownSelection(cop, ds, math.MaxFloat64, path) t := finishCopTask(ds.ctx, cop) return t.plan() } @@ -294,7 +295,8 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) { } // convertToIndexScan converts the DataSource to index scan with idx. -func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInfo) (task task, err error) { +func (ds *DataSource) convertToIndexScan(prop *requiredProp, path *indexPath) (task task, err error) { + idx := path.index is := PhysicalIndexScan{ Table: ds.tableInfo, TableAsName: ds.TableAsName, @@ -308,25 +310,10 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf is.Hist = &statsTbl.Indices[idx.ID].Histogram } rowCount := float64(statsTbl.Count) - sc := ds.ctx.GetSessionVars().StmtCtx - idxCols, colLengths := expression.IndexInfo2Cols(ds.Schema().Columns, idx) is.Ranges = ranger.FullNewRange() eqCount := 0 - if len(ds.pushedDownConds) > 0 { - is.conditions = ds.pushedDownConds - if len(idxCols) > 0 { - is.Ranges, is.AccessCondition, is.filterCondition, eqCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, colLengths) - if err != nil { - return nil, errors.Trace(err) - } - rowCount, err = statsTbl.GetRowCountByIndexRanges(sc, is.Index.ID, is.Ranges) - if err != nil { - return nil, errors.Trace(err) - } - } else { - is.filterCondition = ds.pushedDownConds - } - } + is.AccessCondition, is.Ranges, is.filterCondition, eqCount = path.accessConds, path.ranges, path.indexFilters, path.eqCondCount + rowCount = path.countAfterAccess cop := &copTask{indexPlan: is} if !isCoveringIndex(is.Columns, is.Index.Columns, is.Table.PKIsHandle) { // On this way, it's double read case. @@ -358,12 +345,8 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf // Only use expectedCnt when it's smaller than the count we calculated. // e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. - if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count { - selectivity, err := statsTbl.Selectivity(ds.ctx, is.filterCondition) - if err != nil { - log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) - selectivity = selectionFactor - } + if matchProperty && prop.expectedCnt < path.countAfterIndex { + selectivity := path.countAfterIndex / path.countAfterAccess rowCount = math.Min(prop.expectedCnt/selectivity, rowCount) } is.stats = ds.stats.scaleByExpectCnt(rowCount) @@ -379,7 +362,7 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf } cop.keepOrder = true is.KeepOrder = true - is.addPushedDownSelection(cop, ds, prop.expectedCnt) + is.addPushedDownSelection(cop, ds, prop.expectedCnt, path) } else { expectedCnt := math.MaxFloat64 if prop.isEmpty() { @@ -387,7 +370,7 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, idx *model.IndexInf } else { return invalidTask, nil } - is.addPushedDownSelection(cop, ds, expectedCnt) + is.addPushedDownSelection(cop, ds, expectedCnt, path) } if prop.taskTp == rootTaskType { task = finishCopTask(ds.ctx, task) @@ -418,29 +401,23 @@ func (is *PhysicalIndexScan) initSchema(id int, idx *model.IndexInfo, isDoubleRe is.SetSchema(expression.NewSchema(indexCols...)) } -func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64) { +func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64, path *indexPath) { // Add filter condition to table plan now. - if len(is.filterCondition) > 0 { - var indexConds, tableConds []expression.Expression - if copTask.tablePlan != nil { - indexConds, tableConds = splitIndexFilterConditions(is.filterCondition, is.Index.Columns, is.Table) - } else { - indexConds = is.filterCondition - } - if indexConds != nil { - indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx, - p.getStatsByFilter(append(is.AccessCondition, indexConds...)).scaleByExpectCnt(expectedCnt)) - indexSel.SetChildren(is) - copTask.indexPlan = indexSel - copTask.cst += copTask.count() * cpuFactor - } - if tableConds != nil { - copTask.finishIndexPlan() - tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt)) - tableSel.SetChildren(copTask.tablePlan) - copTask.tablePlan = tableSel - copTask.cst += copTask.count() * cpuFactor - } + indexConds, tableConds := path.indexFilters, path.tableFilters + if indexConds != nil { + stats := &statsInfo{count: path.countAfterIndex} + indexSel := PhysicalSelection{Conditions: indexConds}.init(is.ctx, + stats.scaleByExpectCnt(expectedCnt)) + indexSel.SetChildren(is) + copTask.indexPlan = indexSel + copTask.cst += copTask.count() * cpuFactor + } + if tableConds != nil { + copTask.finishIndexPlan() + tableSel := PhysicalSelection{Conditions: tableConds}.init(is.ctx, p.statsAfterSelect.scaleByExpectCnt(expectedCnt)) + tableSel.SetChildren(copTask.tablePlan) + copTask.tablePlan = tableSel + copTask.cst += copTask.count() * cpuFactor } } @@ -524,7 +501,7 @@ func (ds *DataSource) forceToTableScan(pk *expression.Column) PhysicalPlan { } // convertToTableScan converts the DataSource to table scan. -func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err error) { +func (ds *DataSource) convertToTableScan(prop *requiredProp, path *indexPath) (task task, err error) { // It will be handled in convertToIndexScan. if prop.taskTp == copDoubleReadTaskType { return &copTask{cst: math.MaxFloat64}, nil @@ -537,7 +514,6 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err DBName: ds.DBName, }.init(ds.ctx) ts.SetSchema(ds.schema) - sc := ds.ctx.GetSessionVars().StmtCtx var pkCol *expression.Column if ts.Table.PKIsHandle { if pkColInfo := ts.Table.GetPkColInfo(); pkColInfo != nil { @@ -547,29 +523,9 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err } } } - if pkCol != nil { - ts.Ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) - } else { - ts.Ranges = ranger.FullIntNewRange(false) - } - statsTbl := ds.statisticTable - rowCount := float64(statsTbl.Count) - if len(ds.pushedDownConds) > 0 { - if pkCol != nil { - ts.AccessCondition, ts.filterCondition = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) - ts.Ranges, err = ranger.BuildTableRange(ts.AccessCondition, sc, pkCol.RetType) - if err != nil { - return nil, errors.Trace(err) - } - // TODO: We can use ds.getStatsByFilter(accessConditions). - rowCount, err = statsTbl.GetRowCountByIntColumnRanges(sc, pkCol.ID, ts.Ranges) - if err != nil { - return nil, errors.Trace(err) - } - } else { - ts.filterCondition = ds.pushedDownConds - } - } + ts.Ranges = path.ranges + ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters + rowCount := path.countAfterAccess copTask := &copTask{ tablePlan: ts, indexPlanFinished: true, @@ -580,11 +536,7 @@ func (ds *DataSource) convertToTableScan(prop *requiredProp) (task task, err err // e.g. IndexScan(count1)->After Filter(count2). The `ds.statsAfterSelect.count` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. if matchProperty && prop.expectedCnt < ds.statsAfterSelect.count { - selectivity, err := statsTbl.Selectivity(ds.ctx, ts.filterCondition) - if err != nil { - log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) - selectivity = selectionFactor - } + selectivity := ds.statsAfterSelect.count / rowCount rowCount = math.Min(prop.expectedCnt/selectivity, rowCount) } ts.stats = ds.stats.scaleByExpectCnt(rowCount) diff --git a/plan/plan.go b/plan/plan.go index 5296b44c35b81..84344be0f5417 100644 --- a/plan/plan.go +++ b/plan/plan.go @@ -161,7 +161,7 @@ type LogicalPlan interface { pushDownTopN(topN *LogicalTopN) LogicalPlan // deriveStats derives statistic info between plans. - deriveStats() *statsInfo + deriveStats() (*statsInfo, error) // preparePossibleProperties is only used for join and aggregation. Like group by a,b,c, all permutation of (a,b,c) is // valid, but the ordered indices in leaf plan is limited. So we can get all possible order properties by a pre-walking. diff --git a/plan/planbuilder.go b/plan/planbuilder.go index b9340b5d14abf..1eb9a52fdb1fb 100644 --- a/plan/planbuilder.go +++ b/plan/planbuilder.go @@ -317,17 +317,27 @@ func (b *planBuilder) detectSelectAgg(sel *ast.SelectStmt) bool { return false } -func getAvailableIndices(indexHints []*ast.IndexHint, tableInfo *model.TableInfo) (*availableIndices, error) { - publicIndices := make([]*model.IndexInfo, 0, len(tableInfo.Indices)) - for _, index := range tableInfo.Indices { +func matchPathByIndexName(paths []*indexPath, idxName model.CIStr) *indexPath { + for _, path := range paths { + if path.index.Name.L == idxName.L { + return path + } + } + return nil +} + +func getPossibleIndexPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInfo) ([]*indexPath, error) { + publicPaths := make([]*indexPath, 0, len(tblInfo.Indices)+1) + publicPaths = append(publicPaths, &indexPath{isRowID: true}) + for _, index := range tblInfo.Indices { if index.State == model.StatePublic { - publicIndices = append(publicIndices, index) + publicPaths = append(publicPaths, &indexPath{index: index}) } } hasScanHint, hasUseOrForce := false, false - available := make([]*model.IndexInfo, 0, len(indexHints)) - ignored := make([]*model.IndexInfo, 0, len(indexHints)) + available := make([]*indexPath, 0, len(publicPaths)) + ignored := make([]*indexPath, 0, len(publicPaths)) for _, hint := range indexHints { if hint.HintScope != ast.HintForScan { continue @@ -335,47 +345,51 @@ func getAvailableIndices(indexHints []*ast.IndexHint, tableInfo *model.TableInfo hasScanHint = true for _, idxName := range hint.IndexNames { - idx := findIndexByName(publicIndices, idxName) - if idx == nil { - return nil, ErrKeyDoesNotExist.GenByArgs(idxName, tableInfo.Name) + path := matchPathByIndexName(publicPaths[1:], idxName) + if path == nil { + return nil, ErrKeyDoesNotExist.GenByArgs(idxName, tblInfo.Name) } if hint.HintType == ast.HintIgnore { // Collect all the ignored index hints. - ignored = append(ignored, idx) + ignored = append(ignored, path) continue } // Currently we don't distinguish between "FORCE" and "USE" because // our cost estimation is not reliable. hasUseOrForce = true - available = append(available, idx) + path.forced = true + available = append(available, path) } } if !hasScanHint { - return &availableIndices{publicIndices, true}, nil + return publicPaths, nil } if !hasUseOrForce { - available = removeIgnoredIndices(publicIndices, ignored) - return &availableIndices{available, true}, nil + return removeIgnoredPaths(publicPaths, ignored), nil } - available = removeIgnoredIndices(available, ignored) + available = removeIgnoredPaths(available, ignored) + // If we have got "FORCE" or "USE" index hint but got no available index, // we have to use table scan. - return &availableIndices{available, len(available) == 0}, nil + if len(available) == 0 { + available = append(available, &indexPath{isRowID: true}) + } + return available, nil } -func removeIgnoredIndices(indices, ignores []*model.IndexInfo) []*model.IndexInfo { - if len(ignores) == 0 { - return indices +func removeIgnoredPaths(paths, ignoredPaths []*indexPath) []*indexPath { + if len(ignoredPaths) == 0 { + return paths } - var remainedIndices []*model.IndexInfo - for _, index := range indices { - if findIndexByName(ignores, index.Name) == nil { - remainedIndices = append(remainedIndices, index) + remainedPaths := make([]*indexPath, 0, len(paths)) + for _, path := range paths { + if path.isRowID || matchPathByIndexName(ignoredPaths, path.index.Name) == nil { + remainedPaths = append(remainedPaths, path) } } - return remainedIndices + return remainedPaths } func findIndexByName(indices []*model.IndexInfo, name model.CIStr) *model.IndexInfo { diff --git a/plan/property_cols_prune.go b/plan/property_cols_prune.go index 5449ed87ae3c4..d54534f2589e5 100644 --- a/plan/property_cols_prune.go +++ b/plan/property_cols_prune.go @@ -18,37 +18,19 @@ import ( ) func (ds *DataSource) preparePossibleProperties() [][]*expression.Column { - indices := ds.availableIndices.indices - includeTS := ds.availableIndices.includeTableScan + result := make([][]*expression.Column, 0, len(ds.possibleIndexPaths)) - result := make([][]*expression.Column, 0, len(indices)) - ds.relevantIndices = make([]bool, len(indices)) - - if includeTS { - col := ds.getPKIsHandleCol() - if col != nil { - result = append(result, []*expression.Column{col}) - } - - cols := expression.ExtractColumnsFromExpressions(make([]*expression.Column, 0, 10), ds.pushedDownConds, nil) - colsSet := make(map[string]struct{}, len(cols)) - for _, col := range cols { - colsSet[col.ColName.L] = struct{}{} - } - - for i, idx := range indices { - _, ok := colsSet[idx.Columns[0].Name.L] - ds.relevantIndices[i] = ok - } - } else { - for i := range ds.relevantIndices { - ds.relevantIndices[i] = true - } - } - for _, idx := range indices { - cols, _ := expression.IndexInfo2Cols(ds.schema.Columns, idx) - if len(cols) > 0 { - result = append(result, cols) + for _, path := range ds.possibleIndexPaths { + if path.isRowID { + col := ds.getPKIsHandleCol() + if col != nil { + result = append(result, []*expression.Column{col}) + } + } else { + cols, _ := expression.IndexInfo2Cols(ds.schema.Columns, path.index) + if len(cols) > 0 { + result = append(result, cols) + } } } return result diff --git a/plan/stats.go b/plan/stats.go index b1d7ea7081264..53fc332c16239 100644 --- a/plan/stats.go +++ b/plan/stats.go @@ -17,7 +17,10 @@ import ( "fmt" "math" + "github.com/juju/errors" "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/mysql" + "github.com/pingcap/tidb/util/ranger" log "github.com/sirupsen/logrus" ) @@ -62,7 +65,7 @@ func (p *basePhysicalPlan) StatsInfo() *statsInfo { return p.stats } -func (p *LogicalTableDual) deriveStats() *statsInfo { +func (p *LogicalTableDual) deriveStats() (*statsInfo, error) { profile := &statsInfo{ count: float64(p.RowCount), cardinality: make([]float64, p.Schema().Len()), @@ -71,17 +74,21 @@ func (p *LogicalTableDual) deriveStats() *statsInfo { profile.cardinality[i] = float64(p.RowCount) } p.stats = profile - return p.stats + return p.stats, nil } -func (p *baseLogicalPlan) deriveStats() *statsInfo { +func (p *baseLogicalPlan) deriveStats() (*statsInfo, error) { if len(p.children) > 1 { panic("LogicalPlans with more than one child should implement their own deriveStats().") } if len(p.children) == 1 { - p.stats = p.children[0].deriveStats() - return p.stats + var err error + p.stats, err = p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } + return p.stats, nil } profile := &statsInfo{ @@ -92,7 +99,7 @@ func (p *baseLogicalPlan) deriveStats() *statsInfo { profile.cardinality[i] = float64(1) } p.stats = profile - return profile + return profile, nil } func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo { @@ -118,37 +125,107 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo { return profile.scale(selectivity) } -func (ds *DataSource) deriveStats() *statsInfo { +func (ds *DataSource) deriveStats() (*statsInfo, error) { // PushDownNot here can convert query 'not (a != 1)' to 'a = 1'. for i, expr := range ds.pushedDownConds { ds.pushedDownConds[i] = expression.PushDownNot(nil, expr, false) } ds.statsAfterSelect = ds.getStatsByFilter(ds.pushedDownConds) - return ds.statsAfterSelect + sc := ds.ctx.GetSessionVars().StmtCtx + for _, path := range ds.possibleIndexPaths { + path.countAfterAccess = float64(ds.statisticTable.Count) + path.countAfterIndex = float64(ds.statisticTable.Count) + var err error + if path.isRowID { + var pkCol *expression.Column + if ds.tableInfo.PKIsHandle { + if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil { + pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo) + } + } + if pkCol != nil { + path.ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) + } else { + path.ranges = ranger.FullIntNewRange(false) + } + if len(ds.pushedDownConds) > 0 { + if pkCol != nil { + path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) + path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType) + if err != nil { + return nil, errors.Trace(err) + } + path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges) + if err != nil { + return nil, errors.Trace(err) + } + } else { + path.tableFilters = ds.pushedDownConds + } + } + continue + } + path.ranges = ranger.FullNewRange() + if len(ds.pushedDownConds) > 0 { + idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index) + if len(idxCols) != 0 { + path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths) + if err != nil { + return nil, errors.Trace(err) + } + path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges) + if err != nil { + return nil, errors.Trace(err) + } + path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo) + } else { + path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo) + } + path.countAfterIndex = path.countAfterAccess + if path.indexFilters != nil { + selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters) + if err != nil { + log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) + selectivity = selectionFactor + } + path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count) + } + } + } + return ds.statsAfterSelect, nil } -func (p *LogicalSelection) deriveStats() *statsInfo { - childProfile := p.children[0].deriveStats() +func (p *LogicalSelection) deriveStats() (*statsInfo, error) { + childProfile, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = childProfile.scale(selectionFactor) - return p.stats + return p.stats, nil } -func (p *LogicalUnionAll) deriveStats() *statsInfo { +func (p *LogicalUnionAll) deriveStats() (*statsInfo, error) { p.stats = &statsInfo{ cardinality: make([]float64, p.Schema().Len()), } for _, child := range p.children { - childProfile := child.deriveStats() + childProfile, err := child.deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats.count += childProfile.count for i := range p.stats.cardinality { p.stats.cardinality[i] += childProfile.cardinality[i] } } - return p.stats + return p.stats, nil } -func (p *LogicalLimit) deriveStats() *statsInfo { - childProfile := p.children[0].deriveStats() +func (p *LogicalLimit) deriveStats() (*statsInfo, error) { + childProfile, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = &statsInfo{ count: math.Min(float64(p.Count), childProfile.count), cardinality: make([]float64, len(childProfile.cardinality)), @@ -156,11 +233,14 @@ func (p *LogicalLimit) deriveStats() *statsInfo { for i := range p.stats.cardinality { p.stats.cardinality[i] = math.Min(childProfile.cardinality[i], p.stats.count) } - return p.stats + return p.stats, nil } -func (lt *LogicalTopN) deriveStats() *statsInfo { - childProfile := lt.children[0].deriveStats() +func (lt *LogicalTopN) deriveStats() (*statsInfo, error) { + childProfile, err := lt.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } lt.stats = &statsInfo{ count: math.Min(float64(lt.Count), childProfile.count), cardinality: make([]float64, len(childProfile.cardinality)), @@ -168,7 +248,7 @@ func (lt *LogicalTopN) deriveStats() *statsInfo { for i := range lt.stats.cardinality { lt.stats.cardinality[i] = math.Min(childProfile.cardinality[i], lt.stats.count) } - return lt.stats + return lt.stats, nil } // getCardinality will return the cardinality of a couple of columns. We simply return the max one, because we cannot know @@ -187,8 +267,11 @@ func getCardinality(cols []*expression.Column, schema *expression.Schema, profil return cardinality } -func (p *LogicalProjection) deriveStats() *statsInfo { - childProfile := p.children[0].deriveStats() +func (p *LogicalProjection) deriveStats() (*statsInfo, error) { + childProfile, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = &statsInfo{ count: childProfile.count, cardinality: make([]float64, len(p.Exprs)), @@ -197,11 +280,14 @@ func (p *LogicalProjection) deriveStats() *statsInfo { cols := expression.ExtractColumns(expr) p.stats.cardinality[i] = getCardinality(cols, p.children[0].Schema(), childProfile) } - return p.stats + return p.stats, nil } -func (la *LogicalAggregation) deriveStats() *statsInfo { - childProfile := la.children[0].deriveStats() +func (la *LogicalAggregation) deriveStats() (*statsInfo, error) { + childProfile, err := la.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } gbyCols := make([]*expression.Column, 0, len(la.GroupByItems)) for _, gbyExpr := range la.GroupByItems { cols := expression.ExtractColumns(gbyExpr) @@ -217,7 +303,7 @@ func (la *LogicalAggregation) deriveStats() *statsInfo { la.stats.cardinality[i] = cardinality } la.inputCount = childProfile.count - return la.stats + return la.stats, nil } // deriveStats prepares statsInfo. @@ -227,9 +313,15 @@ func (la *LogicalAggregation) deriveStats() *statsInfo { // N(s) stands for the number of rows in relation s. V(s.key) means the cardinality of join key in s. // This is a quite simple strategy: We assume every bucket of relation which will participate join has the same number of rows, and apply cross join for // every matched bucket. -func (p *LogicalJoin) deriveStats() *statsInfo { - leftProfile := p.children[0].deriveStats() - rightProfile := p.children[1].deriveStats() +func (p *LogicalJoin) deriveStats() (*statsInfo, error) { + leftProfile, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } + rightProfile, err := p.children[1].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } if p.JoinType == SemiJoin || p.JoinType == AntiSemiJoin { p.stats = &statsInfo{ count: leftProfile.count * selectionFactor, @@ -238,7 +330,7 @@ func (p *LogicalJoin) deriveStats() *statsInfo { for i := range p.stats.cardinality { p.stats.cardinality[i] = leftProfile.cardinality[i] * selectionFactor } - return p.stats + return p.stats, nil } if p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin { p.stats = &statsInfo{ @@ -247,14 +339,14 @@ func (p *LogicalJoin) deriveStats() *statsInfo { } copy(p.stats.cardinality, leftProfile.cardinality) p.stats.cardinality[len(p.stats.cardinality)-1] = 2.0 - return p.stats + return p.stats, nil } if 0 == len(p.EqualConditions) { p.stats = &statsInfo{ count: leftProfile.count * rightProfile.count, cardinality: append(leftProfile.cardinality, rightProfile.cardinality...), } - return p.stats + return p.stats, nil } leftKeys := make([]*expression.Column, 0, len(p.EqualConditions)) rightKeys := make([]*expression.Column, 0, len(p.EqualConditions)) @@ -280,12 +372,18 @@ func (p *LogicalJoin) deriveStats() *statsInfo { count: count, cardinality: cardinality, } - return p.stats + return p.stats, nil } -func (la *LogicalApply) deriveStats() *statsInfo { - leftProfile := la.children[0].deriveStats() - _ = la.children[1].deriveStats() +func (la *LogicalApply) deriveStats() (*statsInfo, error) { + leftProfile, err := la.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } + _, err = la.children[1].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } la.stats = &statsInfo{ count: leftProfile.count, cardinality: make([]float64, la.schema.Len()), @@ -298,7 +396,7 @@ func (la *LogicalApply) deriveStats() *statsInfo { la.stats.cardinality[i] = leftProfile.count } } - return la.stats + return la.stats, nil } // Exists and MaxOneRow produce at most one row, so we set the count of stats one. @@ -313,14 +411,20 @@ func getSingletonStats(len int) *statsInfo { return ret } -func (p *LogicalExists) deriveStats() *statsInfo { - p.children[0].deriveStats() +func (p *LogicalExists) deriveStats() (*statsInfo, error) { + _, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = getSingletonStats(1) - return p.stats + return p.stats, nil } -func (p *LogicalMaxOneRow) deriveStats() *statsInfo { - p.children[0].deriveStats() +func (p *LogicalMaxOneRow) deriveStats() (*statsInfo, error) { + _, err := p.children[0].deriveStats() + if err != nil { + return nil, errors.Trace(err) + } p.stats = getSingletonStats(p.Schema().Len()) - return p.stats + return p.stats, nil } diff --git a/types/datum.go b/types/datum.go index 549c3127cd21d..ff99b1d64f673 100644 --- a/types/datum.go +++ b/types/datum.go @@ -122,6 +122,11 @@ func (d *Datum) IsNull() bool { return d.k == KindNull } +// IsMaxValue checks if datum is max value. +func (d *Datum) IsMaxValue() bool { + return d.k == KindMaxValue +} + // GetInt64 gets int64 value. func (d *Datum) GetInt64() int64 { return d.i From 50fd95905d0a84f6549fbcd3cabdf5bce41e9e77 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Tue, 24 Apr 2018 16:06:28 +0800 Subject: [PATCH 2/8] address comments. --- plan/build_key_info.go | 2 +- plan/gen_physical_plans.go | 4 +- plan/logical_plan_builder.go | 14 ++--- plan/logical_plans.go | 97 ++++++++++++++++++++++++++++++----- plan/physical_plan_builder.go | 10 ++-- plan/planbuilder.go | 20 ++++---- plan/property_cols_prune.go | 4 +- plan/stats.go | 63 +++-------------------- types/datum.go | 5 -- 9 files changed, 117 insertions(+), 102 deletions(-) diff --git a/plan/build_key_info.go b/plan/build_key_info.go index ac548803b8312..a67c6f8c5c025 100644 --- a/plan/build_key_info.go +++ b/plan/build_key_info.go @@ -174,7 +174,7 @@ func (p *LogicalJoin) buildKeyInfo() { func (ds *DataSource) buildKeyInfo() { ds.schema.Keys = nil ds.baseLogicalPlan.buildKeyInfo() - for _, path := range ds.possibleIndexPaths { + for _, path := range ds.possibleAccessPaths { if path.isRowID { continue } diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go index 89c43e86ac9bf..f39c949f5d60c 100644 --- a/plan/gen_physical_plans.go +++ b/plan/gen_physical_plans.go @@ -256,8 +256,8 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ if !ok { return nil } - indexPaths := x.possibleIndexPaths - if len(x.possibleIndexPaths) > 0 && x.possibleIndexPaths[0].isRowID { + indexPaths := x.possibleAccessPaths + if len(x.possibleAccessPaths) > 0 && x.possibleAccessPaths[0].isRowID { indexPaths = indexPaths[1:] pkCol := x.getPKIsHandleCol() if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) { diff --git a/plan/logical_plan_builder.go b/plan/logical_plan_builder.go index a3b178e2887ac..d1f9986cc925c 100644 --- a/plan/logical_plan_builder.go +++ b/plan/logical_plan_builder.go @@ -1671,7 +1671,7 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { tableInfo := tbl.Meta() b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "") - possiblePaths, err := getPossibleIndexPaths(tn.IndexHints, tableInfo) + possiblePaths, err := getPossibleAccessPaths(tn.IndexHints, tableInfo) if err != nil { b.err = errors.Trace(err) return nil @@ -1685,12 +1685,12 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { } ds := DataSource{ - DBName: dbName, - tableInfo: tableInfo, - statisticTable: b.getStatsTable(tableInfo), - indexHints: tn.IndexHints, - possibleIndexPaths: possiblePaths, - Columns: make([]*model.ColumnInfo, 0, len(columns)), + DBName: dbName, + tableInfo: tableInfo, + statisticTable: b.getStatsTable(tableInfo), + indexHints: tn.IndexHints, + possibleAccessPaths: possiblePaths, + Columns: make([]*model.ColumnInfo, 0, len(columns)), }.init(b.ctx) var handleCol *expression.Column diff --git a/plan/logical_plans.go b/plan/logical_plans.go index 9562228a201d8..32728d514778d 100644 --- a/plan/logical_plans.go +++ b/plan/logical_plans.go @@ -14,6 +14,9 @@ package plan import ( + "math" + + "github.com/juju/errors" "github.com/pingcap/tidb/ast" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/expression/aggregation" @@ -22,6 +25,7 @@ import ( "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/ranger" + log "github.com/sirupsen/logrus" ) var ( @@ -300,23 +304,88 @@ type DataSource struct { statisticTable *statistics.Table - // possibleIndexPaths stores all the possible index path for physical plan, including table scan. - // Please make true table scan is always the first element. - possibleIndexPaths []*indexPath + // possibleAccessPaths stores all the possible index path for physical plan, including table scan. + // Please make sure table scan is always the first element. + possibleAccessPaths []*accessPath } -type indexPath struct { - index *model.IndexInfo - ranges []*ranger.NewRange +// accessPath tells how we access one index or just access table. +type accessPath struct { + index *model.IndexInfo + ranges []*ranger.NewRange + // countAfterAccess is the row count after we apply range seek and before we use other filter to filter data. countAfterAccess float64 - countAfterIndex float64 - accessConds []expression.Expression - eqCondCount int - indexFilters []expression.Expression - tableFilters []expression.Expression - filterUnmatched bool - isRowID bool - forced bool + // countAfterIndex is the row count after we apply filters on index and before we apply the table filters. + countAfterIndex float64 + accessConds []expression.Expression + eqCondCount int + indexFilters []expression.Expression + tableFilters []expression.Expression + // isRowID indicates this path stores the information for table scan. + isRowID bool + // forced means this index is generated by `use/force index()`. + forced bool +} + +func (ds *DataSource) prepareTablePath(path *accessPath) error { + var err error + sc := ds.ctx.GetSessionVars().StmtCtx + path.countAfterAccess = float64(ds.statisticTable.Count) + var pkCol *expression.Column + if pkCol != nil { + path.ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) + } else { + path.ranges = ranger.FullIntNewRange(false) + } + path.countAfterAccess = float64(ds.statisticTable.Count) + if len(ds.pushedDownConds) > 0 { + if pkCol != nil { + path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) + path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType) + if err != nil { + return errors.Trace(err) + } + path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges) + if err != nil { + return errors.Trace(err) + } + } else { + path.tableFilters = ds.pushedDownConds + } + } + return nil +} + +func (ds *DataSource) prepareIndexPath(path *accessPath) error { + var err error + sc := ds.ctx.GetSessionVars().StmtCtx + path.ranges = ranger.FullNewRange() + path.countAfterAccess = float64(ds.statisticTable.Count) + path.countAfterIndex = float64(ds.statisticTable.Count) + idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index) + if len(idxCols) != 0 { + path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths) + if err != nil { + return errors.Trace(err) + } + path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges) + if err != nil { + return errors.Trace(err) + } + path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo) + } else { + path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo) + } + path.countAfterIndex = path.countAfterAccess + if path.indexFilters != nil { + selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters) + if err != nil { + log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) + selectivity = selectionFactor + } + path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count) + } + return nil } func (ds *DataSource) getPKIsHandleCol() *expression.Column { diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go index fdb4db57aebd1..ec3ab863877c7 100644 --- a/plan/physical_plan_builder.go +++ b/plan/physical_plan_builder.go @@ -203,7 +203,7 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) { t = invalidTask - for _, path := range ds.possibleIndexPaths { + for _, path := range ds.possibleAccessPaths { if path.isRowID { tblTask, err := ds.convertToTableScan(prop, path) if err != nil { @@ -276,7 +276,7 @@ func (ds *DataSource) forceToIndexScan(idx *model.IndexInfo, remainedConds []exp } is.initSchema(ds.id, idx, cop.tablePlan != nil) indexConds, tblConds := splitIndexFilterConditions(remainedConds, idx.Columns, ds.tableInfo) - path := &indexPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64} + path := &accessPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64} is.addPushedDownSelection(cop, ds, math.MaxFloat64, path) t := finishCopTask(ds.ctx, cop) return t.plan() @@ -295,7 +295,7 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) { } // convertToIndexScan converts the DataSource to index scan with idx. -func (ds *DataSource) convertToIndexScan(prop *requiredProp, path *indexPath) (task task, err error) { +func (ds *DataSource) convertToIndexScan(prop *requiredProp, path *accessPath) (task task, err error) { idx := path.index is := PhysicalIndexScan{ Table: ds.tableInfo, @@ -401,7 +401,7 @@ func (is *PhysicalIndexScan) initSchema(id int, idx *model.IndexInfo, isDoubleRe is.SetSchema(expression.NewSchema(indexCols...)) } -func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64, path *indexPath) { +func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64, path *accessPath) { // Add filter condition to table plan now. indexConds, tableConds := path.indexFilters, path.tableFilters if indexConds != nil { @@ -501,7 +501,7 @@ func (ds *DataSource) forceToTableScan(pk *expression.Column) PhysicalPlan { } // convertToTableScan converts the DataSource to table scan. -func (ds *DataSource) convertToTableScan(prop *requiredProp, path *indexPath) (task task, err error) { +func (ds *DataSource) convertToTableScan(prop *requiredProp, path *accessPath) (task task, err error) { // It will be handled in convertToIndexScan. if prop.taskTp == copDoubleReadTaskType { return &copTask{cst: math.MaxFloat64}, nil diff --git a/plan/planbuilder.go b/plan/planbuilder.go index 6b6453005626c..35c7002dd4c33 100644 --- a/plan/planbuilder.go +++ b/plan/planbuilder.go @@ -317,7 +317,7 @@ func (b *planBuilder) detectSelectAgg(sel *ast.SelectStmt) bool { return false } -func matchPathByIndexName(paths []*indexPath, idxName model.CIStr) *indexPath { +func matchPathByIndexName(paths []*accessPath, idxName model.CIStr) *accessPath { for _, path := range paths { if path.index.Name.L == idxName.L { return path @@ -326,18 +326,18 @@ func matchPathByIndexName(paths []*indexPath, idxName model.CIStr) *indexPath { return nil } -func getPossibleIndexPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInfo) ([]*indexPath, error) { - publicPaths := make([]*indexPath, 0, len(tblInfo.Indices)+1) - publicPaths = append(publicPaths, &indexPath{isRowID: true}) +func getPossibleAccessPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInfo) ([]*accessPath, error) { + publicPaths := make([]*accessPath, 0, len(tblInfo.Indices)+1) + publicPaths = append(publicPaths, &accessPath{isRowID: true}) for _, index := range tblInfo.Indices { if index.State == model.StatePublic { - publicPaths = append(publicPaths, &indexPath{index: index}) + publicPaths = append(publicPaths, &accessPath{index: index}) } } hasScanHint, hasUseOrForce := false, false - available := make([]*indexPath, 0, len(publicPaths)) - ignored := make([]*indexPath, 0, len(publicPaths)) + available := make([]*accessPath, 0, len(publicPaths)) + ignored := make([]*accessPath, 0, len(publicPaths)) for _, hint := range indexHints { if hint.HintScope != ast.HintForScan { continue @@ -374,16 +374,16 @@ func getPossibleIndexPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInfo // If we have got "FORCE" or "USE" index hint but got no available index, // we have to use table scan. if len(available) == 0 { - available = append(available, &indexPath{isRowID: true}) + available = append(available, &accessPath{isRowID: true}) } return available, nil } -func removeIgnoredPaths(paths, ignoredPaths []*indexPath) []*indexPath { +func removeIgnoredPaths(paths, ignoredPaths []*accessPath) []*accessPath { if len(ignoredPaths) == 0 { return paths } - remainedPaths := make([]*indexPath, 0, len(paths)) + remainedPaths := make([]*accessPath, 0, len(paths)) for _, path := range paths { if path.isRowID || matchPathByIndexName(ignoredPaths, path.index.Name) == nil { remainedPaths = append(remainedPaths, path) diff --git a/plan/property_cols_prune.go b/plan/property_cols_prune.go index d54534f2589e5..d6469ab2c54f3 100644 --- a/plan/property_cols_prune.go +++ b/plan/property_cols_prune.go @@ -18,9 +18,9 @@ import ( ) func (ds *DataSource) preparePossibleProperties() [][]*expression.Column { - result := make([][]*expression.Column, 0, len(ds.possibleIndexPaths)) + result := make([][]*expression.Column, 0, len(ds.possibleAccessPaths)) - for _, path := range ds.possibleIndexPaths { + for _, path := range ds.possibleAccessPaths { if path.isRowID { col := ds.getPKIsHandleCol() if col != nil { diff --git a/plan/stats.go b/plan/stats.go index 53fc332c16239..ca1b9b5323e70 100644 --- a/plan/stats.go +++ b/plan/stats.go @@ -19,8 +19,6 @@ import ( "github.com/juju/errors" "github.com/pingcap/tidb/expression" - "github.com/pingcap/tidb/mysql" - "github.com/pingcap/tidb/util/ranger" log "github.com/sirupsen/logrus" ) @@ -131,65 +129,18 @@ func (ds *DataSource) deriveStats() (*statsInfo, error) { ds.pushedDownConds[i] = expression.PushDownNot(nil, expr, false) } ds.statsAfterSelect = ds.getStatsByFilter(ds.pushedDownConds) - sc := ds.ctx.GetSessionVars().StmtCtx - for _, path := range ds.possibleIndexPaths { - path.countAfterAccess = float64(ds.statisticTable.Count) - path.countAfterIndex = float64(ds.statisticTable.Count) + for _, path := range ds.possibleAccessPaths { var err error if path.isRowID { - var pkCol *expression.Column - if ds.tableInfo.PKIsHandle { - if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil { - pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo) - } - } - if pkCol != nil { - path.ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) - } else { - path.ranges = ranger.FullIntNewRange(false) - } - if len(ds.pushedDownConds) > 0 { - if pkCol != nil { - path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) - path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType) - if err != nil { - return nil, errors.Trace(err) - } - path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges) - if err != nil { - return nil, errors.Trace(err) - } - } else { - path.tableFilters = ds.pushedDownConds - } + err = ds.prepareTablePath(path) + if err != nil { + return nil, errors.Trace(err) } continue } - path.ranges = ranger.FullNewRange() - if len(ds.pushedDownConds) > 0 { - idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index) - if len(idxCols) != 0 { - path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths) - if err != nil { - return nil, errors.Trace(err) - } - path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges) - if err != nil { - return nil, errors.Trace(err) - } - path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.indexFilters, path.index.Columns, ds.tableInfo) - } else { - path.indexFilters, path.tableFilters = splitIndexFilterConditions(ds.pushedDownConds, path.index.Columns, ds.tableInfo) - } - path.countAfterIndex = path.countAfterAccess - if path.indexFilters != nil { - selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters) - if err != nil { - log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) - selectivity = selectionFactor - } - path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.statsAfterSelect.count) - } + err = ds.prepareIndexPath(path) + if err != nil { + return nil, errors.Trace(err) } } return ds.statsAfterSelect, nil diff --git a/types/datum.go b/types/datum.go index ff99b1d64f673..549c3127cd21d 100644 --- a/types/datum.go +++ b/types/datum.go @@ -122,11 +122,6 @@ func (d *Datum) IsNull() bool { return d.k == KindNull } -// IsMaxValue checks if datum is max value. -func (d *Datum) IsMaxValue() bool { - return d.k == KindMaxValue -} - // GetInt64 gets int64 value. func (d *Datum) GetInt64() int64 { return d.i From ba77bdffd95774397863dc70ee0da47a7d6d7997 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 25 Apr 2018 15:28:08 +0800 Subject: [PATCH 3/8] address comment. --- plan/gen_physical_plans.go | 8 ++++---- plan/logical_plans.go | 15 +++++++++------ plan/physical_plan_builder.go | 21 ++++++++++----------- plan/stats.go | 5 ++--- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go index f39c949f5d60c..1f43cd888ac19 100644 --- a/plan/gen_physical_plans.go +++ b/plan/gen_physical_plans.go @@ -256,9 +256,9 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ if !ok { return nil } - indexPaths := x.possibleAccessPaths - if len(x.possibleAccessPaths) > 0 && x.possibleAccessPaths[0].isRowID { - indexPaths = indexPaths[1:] + accessPaths := x.possibleAccessPaths + if len(accessPaths) > 0 && x.possibleAccessPaths[0].isRowID { + accessPaths = accessPaths[1:] pkCol := x.getPKIsHandleCol() if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) { innerPlan := x.forceToTableScan(pkCol) @@ -272,7 +272,7 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ remainedOfBest []expression.Expression keyOff2IdxOff []int ) - for _, path := range indexPaths { + for _, path := range accessPaths { indexInfo := path.index ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys) // We choose the index by the number of used columns of the range, the much the better. diff --git a/plan/logical_plans.go b/plan/logical_plans.go index 32728d514778d..6d9d01cd22a11 100644 --- a/plan/logical_plans.go +++ b/plan/logical_plans.go @@ -304,8 +304,8 @@ type DataSource struct { statisticTable *statistics.Table - // possibleAccessPaths stores all the possible index path for physical plan, including table scan. - // Please make sure table scan is always the first element. + // possibleAccessPaths stores all the possible access path for physical plan, including table scan. + // Please make sure table path is always the first element. possibleAccessPaths []*accessPath } @@ -327,17 +327,21 @@ type accessPath struct { forced bool } -func (ds *DataSource) prepareTablePath(path *accessPath) error { +func (ds *DataSource) deriveTablePathStats(path *accessPath) error { var err error sc := ds.ctx.GetSessionVars().StmtCtx path.countAfterAccess = float64(ds.statisticTable.Count) var pkCol *expression.Column + if ds.tableInfo.PKIsHandle { + if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil { + pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo) + } + } if pkCol != nil { path.ranges = ranger.FullIntNewRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) } else { path.ranges = ranger.FullIntNewRange(false) } - path.countAfterAccess = float64(ds.statisticTable.Count) if len(ds.pushedDownConds) > 0 { if pkCol != nil { path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) @@ -356,12 +360,11 @@ func (ds *DataSource) prepareTablePath(path *accessPath) error { return nil } -func (ds *DataSource) prepareIndexPath(path *accessPath) error { +func (ds *DataSource) deriveIndexPathStats(path *accessPath) error { var err error sc := ds.ctx.GetSessionVars().StmtCtx path.ranges = ranger.FullNewRange() path.countAfterAccess = float64(ds.statisticTable.Count) - path.countAfterIndex = float64(ds.statisticTable.Count) idxCols, lengths := expression.IndexInfo2Cols(ds.schema.Columns, path.index) if len(idxCols) != 0 { path.ranges, path.accessConds, path.indexFilters, path.eqCondCount, err = ranger.DetachCondAndBuildRangeForIndex(ds.ctx, ds.pushedDownConds, idxCols, lengths) diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go index ec3ab863877c7..b9133761ca815 100644 --- a/plan/physical_plan_builder.go +++ b/plan/physical_plan_builder.go @@ -212,15 +212,15 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) { if tblTask.cost() < t.cost() { t = tblTask } - } else { - if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { - idxTask, err := ds.convertToIndexScan(prop, path) - if err != nil { - return nil, errors.Trace(err) - } - if idxTask.cost() < t.cost() { - t = idxTask - } + continue + } + if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { + idxTask, err := ds.convertToIndexScan(prop, path) + if err != nil { + return nil, errors.Trace(err) + } + if idxTask.cost() < t.cost() { + t = idxTask } } } @@ -309,11 +309,10 @@ func (ds *DataSource) convertToIndexScan(prop *requiredProp, path *accessPath) ( if statsTbl.Indices[idx.ID] != nil { is.Hist = &statsTbl.Indices[idx.ID].Histogram } - rowCount := float64(statsTbl.Count) is.Ranges = ranger.FullNewRange() eqCount := 0 is.AccessCondition, is.Ranges, is.filterCondition, eqCount = path.accessConds, path.ranges, path.indexFilters, path.eqCondCount - rowCount = path.countAfterAccess + rowCount := path.countAfterAccess cop := &copTask{indexPlan: is} if !isCoveringIndex(is.Columns, is.Index.Columns, is.Table.PKIsHandle) { // On this way, it's double read case. diff --git a/plan/stats.go b/plan/stats.go index ca1b9b5323e70..ced64e14f88c5 100644 --- a/plan/stats.go +++ b/plan/stats.go @@ -130,15 +130,14 @@ func (ds *DataSource) deriveStats() (*statsInfo, error) { } ds.statsAfterSelect = ds.getStatsByFilter(ds.pushedDownConds) for _, path := range ds.possibleAccessPaths { - var err error if path.isRowID { - err = ds.prepareTablePath(path) + err := ds.deriveTablePathStats(path) if err != nil { return nil, errors.Trace(err) } continue } - err = ds.prepareIndexPath(path) + err := ds.deriveIndexPathStats(path) if err != nil { return nil, errors.Trace(err) } From 8192bfa9116a2c8e05924c1b5a1b46fa1b702a4c Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 25 Apr 2018 19:45:18 +0800 Subject: [PATCH 4/8] address comments. --- plan/logical_plans.go | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/plan/logical_plans.go b/plan/logical_plans.go index f4ec8b1a1139d..12b7514ba15aa 100644 --- a/plan/logical_plans.go +++ b/plan/logical_plans.go @@ -331,31 +331,29 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath) error { var err error sc := ds.ctx.GetSessionVars().StmtCtx path.countAfterAccess = float64(ds.statisticTable.Count) + path.tableFilters = ds.pushedDownConds var pkCol *expression.Column if ds.tableInfo.PKIsHandle { if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil { pkCol = expression.ColInfo2Col(ds.schema.Columns, pkColInfo) } } - if pkCol != nil { - path.ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) - } else { + if pkCol == nil { path.ranges = ranger.FullIntRange(false) + return nil } - if len(ds.pushedDownConds) > 0 { - if pkCol != nil { - path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) - path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType) - if err != nil { - return errors.Trace(err) - } - path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges) - if err != nil { - return errors.Trace(err) - } - } else { - path.tableFilters = ds.pushedDownConds - } + path.ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.Flag)) + if len(ds.pushedDownConds) == 0 { + return nil + } + path.accessConds, path.tableFilters = ranger.DetachCondsForTableRange(ds.ctx, ds.pushedDownConds, pkCol) + path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType) + if err != nil { + return errors.Trace(err) + } + path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges) + if err != nil { + return errors.Trace(err) } return nil } From 5d3620f49152265eb304288482f9fddc3202379a Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 26 Apr 2018 13:14:57 +0800 Subject: [PATCH 5/8] address comment. --- plan/gen_physical_plans.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go index 862fa21dd636f..3c6ff9929bac6 100644 --- a/plan/gen_physical_plans.go +++ b/plan/gen_physical_plans.go @@ -259,10 +259,12 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ accessPaths := x.possibleAccessPaths if len(accessPaths) > 0 && x.possibleAccessPaths[0].isRowID { accessPaths = accessPaths[1:] - pkCol := x.getPKIsHandleCol() - if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) { - innerPlan := x.forceToTableScan(pkCol) - return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, nil, nil) + if len(innerJoinKeys) == 1 { + pkCol := x.getPKIsHandleCol() + if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) { + innerPlan := x.forceToTableScan(pkCol) + return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, nil, nil) + } } } var ( From d170ac34347ae659457448ee824e6d668834e3a7 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Sat, 28 Apr 2018 13:10:58 +0800 Subject: [PATCH 6/8] address comments. --- plan/build_key_info.go | 2 +- plan/gen_physical_plans.go | 2 +- plan/logical_plans.go | 13 +++++-------- plan/physical_plan_builder.go | 6 +++++- plan/planbuilder.go | 17 +++++++---------- plan/property_cols_prune.go | 2 +- plan/stats.go | 7 ++----- 7 files changed, 22 insertions(+), 27 deletions(-) diff --git a/plan/build_key_info.go b/plan/build_key_info.go index a67c6f8c5c025..24cf42072bc7a 100644 --- a/plan/build_key_info.go +++ b/plan/build_key_info.go @@ -175,7 +175,7 @@ func (ds *DataSource) buildKeyInfo() { ds.schema.Keys = nil ds.baseLogicalPlan.buildKeyInfo() for _, path := range ds.possibleAccessPaths { - if path.isRowID { + if path.isTablePath { continue } idx := path.index diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go index 3c6ff9929bac6..4fdbedf523499 100644 --- a/plan/gen_physical_plans.go +++ b/plan/gen_physical_plans.go @@ -257,7 +257,7 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ return nil } accessPaths := x.possibleAccessPaths - if len(accessPaths) > 0 && x.possibleAccessPaths[0].isRowID { + if len(accessPaths) > 0 && accessPaths[0].isTablePath { accessPaths = accessPaths[1:] if len(innerJoinKeys) == 1 { pkCol := x.getPKIsHandleCol() diff --git a/plan/logical_plans.go b/plan/logical_plans.go index 12b7514ba15aa..91e0509ca03ca 100644 --- a/plan/logical_plans.go +++ b/plan/logical_plans.go @@ -305,7 +305,7 @@ type DataSource struct { statisticTable *statistics.Table // possibleAccessPaths stores all the possible access path for physical plan, including table scan. - // Please make sure table path is always the first element. + // Please make sure table path is always the first element if we have table path. possibleAccessPaths []*accessPath } @@ -321,9 +321,9 @@ type accessPath struct { eqCondCount int indexFilters []expression.Expression tableFilters []expression.Expression - // isRowID indicates this path stores the information for table scan. - isRowID bool - // forced means this index is generated by `use/force index()`. + // isTablePath indicates whether this path is table path. + isTablePath bool + // forced means this path is generated by `use/force index()`. forced bool } @@ -352,10 +352,7 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath) error { return errors.Trace(err) } path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges) - if err != nil { - return errors.Trace(err) - } - return nil + return errors.Trace(err) } func (ds *DataSource) deriveIndexPathStats(path *accessPath) error { diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go index e8e48c17974a6..ed4a818d00e34 100644 --- a/plan/physical_plan_builder.go +++ b/plan/physical_plan_builder.go @@ -203,7 +203,7 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) { t = invalidTask for _, path := range ds.possibleAccessPaths { - if path.isRowID { + if path.isTablePath { tblTask, err := ds.convertToTableScan(prop, path) if err != nil { return nil, errors.Trace(err) @@ -213,6 +213,10 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) { } continue } + // We will use index to generate physical plan if: + // this path's access cond is not nil or + // we have prop to match or + // this index is force to choose. if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { idxTask, err := ds.convertToIndexScan(prop, path) if err != nil { diff --git a/plan/planbuilder.go b/plan/planbuilder.go index 6ed92cda53a91..52fd021569084 100644 --- a/plan/planbuilder.go +++ b/plan/planbuilder.go @@ -317,7 +317,7 @@ func (b *planBuilder) detectSelectAgg(sel *ast.SelectStmt) bool { return false } -func matchPathByIndexName(paths []*accessPath, idxName model.CIStr) *accessPath { +func getPathByIndexName(paths []*accessPath, idxName model.CIStr) *accessPath { for _, path := range paths { if path.index.Name.L == idxName.L { return path @@ -328,7 +328,7 @@ func matchPathByIndexName(paths []*accessPath, idxName model.CIStr) *accessPath func getPossibleAccessPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInfo) ([]*accessPath, error) { publicPaths := make([]*accessPath, 0, len(tblInfo.Indices)+1) - publicPaths = append(publicPaths, &accessPath{isRowID: true}) + publicPaths = append(publicPaths, &accessPath{isTablePath: true}) for _, index := range tblInfo.Indices { if index.State == model.StatePublic { publicPaths = append(publicPaths, &accessPath{index: index}) @@ -345,7 +345,7 @@ func getPossibleAccessPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInf hasScanHint = true for _, idxName := range hint.IndexNames { - path := matchPathByIndexName(publicPaths[1:], idxName) + path := getPathByIndexName(publicPaths[1:], idxName) if path == nil { return nil, ErrKeyDoesNotExist.GenByArgs(idxName, tblInfo.Name) } @@ -362,11 +362,8 @@ func getPossibleAccessPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInf } } - if !hasScanHint { - return publicPaths, nil - } - if !hasUseOrForce { - return removeIgnoredPaths(publicPaths, ignored), nil + if !hasScanHint || !hasUseOrForce { + available = publicPaths } available = removeIgnoredPaths(available, ignored) @@ -374,7 +371,7 @@ func getPossibleAccessPaths(indexHints []*ast.IndexHint, tblInfo *model.TableInf // If we have got "FORCE" or "USE" index hint but got no available index, // we have to use table scan. if len(available) == 0 { - available = append(available, &accessPath{isRowID: true}) + available = append(available, &accessPath{isTablePath: true}) } return available, nil } @@ -385,7 +382,7 @@ func removeIgnoredPaths(paths, ignoredPaths []*accessPath) []*accessPath { } remainedPaths := make([]*accessPath, 0, len(paths)) for _, path := range paths { - if path.isRowID || matchPathByIndexName(ignoredPaths, path.index.Name) == nil { + if path.isTablePath || getPathByIndexName(ignoredPaths, path.index.Name) == nil { remainedPaths = append(remainedPaths, path) } } diff --git a/plan/property_cols_prune.go b/plan/property_cols_prune.go index d6469ab2c54f3..e85750b879926 100644 --- a/plan/property_cols_prune.go +++ b/plan/property_cols_prune.go @@ -21,7 +21,7 @@ func (ds *DataSource) preparePossibleProperties() [][]*expression.Column { result := make([][]*expression.Column, 0, len(ds.possibleAccessPaths)) for _, path := range ds.possibleAccessPaths { - if path.isRowID { + if path.isTablePath { col := ds.getPKIsHandleCol() if col != nil { result = append(result, []*expression.Column{col}) diff --git a/plan/stats.go b/plan/stats.go index ced64e14f88c5..c10b133f94f91 100644 --- a/plan/stats.go +++ b/plan/stats.go @@ -83,10 +83,7 @@ func (p *baseLogicalPlan) deriveStats() (*statsInfo, error) { if len(p.children) == 1 { var err error p.stats, err = p.children[0].deriveStats() - if err != nil { - return nil, errors.Trace(err) - } - return p.stats, nil + return p.stats, errors.Trace(err) } profile := &statsInfo{ @@ -130,7 +127,7 @@ func (ds *DataSource) deriveStats() (*statsInfo, error) { } ds.statsAfterSelect = ds.getStatsByFilter(ds.pushedDownConds) for _, path := range ds.possibleAccessPaths { - if path.isRowID { + if path.isTablePath { err := ds.deriveTablePathStats(path) if err != nil { return nil, errors.Trace(err) From e528c54492bdbf778de0f2aae58e36d26cbac348 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Sat, 28 Apr 2018 14:03:17 +0800 Subject: [PATCH 7/8] address comment. --- plan/gen_physical_plans.go | 11 +++++++++-- plan/logical_plans.go | 1 - 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go index 4fdbedf523499..cb0b3e2f08b75 100644 --- a/plan/gen_physical_plans.go +++ b/plan/gen_physical_plans.go @@ -257,8 +257,15 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ return nil } accessPaths := x.possibleAccessPaths - if len(accessPaths) > 0 && accessPaths[0].isTablePath { - accessPaths = accessPaths[1:] + var tblPath *accessPath + for i, path := range accessPaths { + if path.isTablePath { + tblPath = path + accessPaths = append(accessPaths[:i], accessPaths[i+1:]...) + break + } + } + if tblPath != nil { if len(innerJoinKeys) == 1 { pkCol := x.getPKIsHandleCol() if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) { diff --git a/plan/logical_plans.go b/plan/logical_plans.go index 91e0509ca03ca..6019ef69b801c 100644 --- a/plan/logical_plans.go +++ b/plan/logical_plans.go @@ -305,7 +305,6 @@ type DataSource struct { statisticTable *statistics.Table // possibleAccessPaths stores all the possible access path for physical plan, including table scan. - // Please make sure table path is always the first element if we have table path. possibleAccessPaths []*accessPath } From 8d5775bfa805fedd476ef102fb889bd3b7d62ea7 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Sat, 28 Apr 2018 14:33:50 +0800 Subject: [PATCH 8/8] address comments. --- plan/gen_physical_plans.go | 21 ++++++++++----------- plan/physical_plan_builder.go | 2 +- plan/property_cols_prune.go | 10 +++++----- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/plan/gen_physical_plans.go b/plan/gen_physical_plans.go index cb0b3e2f08b75..9dfefa8ccfec7 100644 --- a/plan/gen_physical_plans.go +++ b/plan/gen_physical_plans.go @@ -256,22 +256,18 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ if !ok { return nil } - accessPaths := x.possibleAccessPaths var tblPath *accessPath - for i, path := range accessPaths { + for _, path := range x.possibleAccessPaths { if path.isTablePath { tblPath = path - accessPaths = append(accessPaths[:i], accessPaths[i+1:]...) break } } - if tblPath != nil { - if len(innerJoinKeys) == 1 { - pkCol := x.getPKIsHandleCol() - if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) { - innerPlan := x.forceToTableScan(pkCol) - return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, nil, nil) - } + if tblPath != nil && len(innerJoinKeys) == 1 { + pkCol := x.getPKIsHandleCol() + if pkCol != nil && innerJoinKeys[0].Equal(nil, pkCol) { + innerPlan := x.forceToTableScan(pkCol) + return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, nil, nil) } } var ( @@ -281,7 +277,10 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *requiredProp, outerIdx int) [ remainedOfBest []expression.Expression keyOff2IdxOff []int ) - for _, path := range accessPaths { + for _, path := range x.possibleAccessPaths { + if path.isTablePath { + continue + } indexInfo := path.index ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, x, innerJoinKeys) // We choose the index by the number of used columns of the range, the much the better. diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go index ed4a818d00e34..4478924094d88 100644 --- a/plan/physical_plan_builder.go +++ b/plan/physical_plan_builder.go @@ -216,7 +216,7 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (task, error) { // We will use index to generate physical plan if: // this path's access cond is not nil or // we have prop to match or - // this index is force to choose. + // this index is forced to choose. if len(path.accessConds) > 0 || len(prop.cols) > 0 || path.forced { idxTask, err := ds.convertToIndexScan(prop, path) if err != nil { diff --git a/plan/property_cols_prune.go b/plan/property_cols_prune.go index e85750b879926..1333c9610f255 100644 --- a/plan/property_cols_prune.go +++ b/plan/property_cols_prune.go @@ -26,11 +26,11 @@ func (ds *DataSource) preparePossibleProperties() [][]*expression.Column { if col != nil { result = append(result, []*expression.Column{col}) } - } else { - cols, _ := expression.IndexInfo2Cols(ds.schema.Columns, path.index) - if len(cols) > 0 { - result = append(result, cols) - } + continue + } + cols, _ := expression.IndexInfo2Cols(ds.schema.Columns, path.index) + if len(cols) > 0 { + result = append(result, cols) } } return result