From c77d0618b8cbea40e65c15141747f5c30f53ad90 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Wed, 19 Oct 2022 22:32:10 +0800 Subject: [PATCH 01/21] avoid double scan for index prefix col is (not) null --- planner/core/exhaust_physical_plans.go | 2 +- planner/core/find_best_task.go | 87 +++++++++++++----- planner/core/integration_test.go | 25 ++++++ planner/core/logical_plans.go | 8 +- planner/core/rule_column_pruning.go | 8 ++ planner/core/stats.go | 2 +- util/ranger/checker.go | 118 +++++++++++++++---------- util/ranger/detacher.go | 49 +++++----- 8 files changed, 206 insertions(+), 93 deletions(-) diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index e726996268831..71f60065ab566 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -1148,7 +1148,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask( cop.commonHandleCols = ds.commonHandleCols } is.initSchema(append(path.FullIdxCols, ds.commonHandleCols...), cop.tablePlan != nil) - indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) + indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens) if maxOneRow { // Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger // than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 2389f56da5337..a74b14e412dd0 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1118,7 +1118,7 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) (indexPlan PhysicalPlan) { is := ds.getOriginalPhysicalIndexScan(prop, path, false, false) - // TODO: Consider using isCoveringIndex() to avoid another TableRead + // TODO: Consider using indexCoveringColumns() to avoid another TableRead indexConds := path.IndexFilters if indexConds != nil { var selectivity float64 @@ -1281,33 +1281,76 @@ func extractFiltersForIndexMerge(sc *stmtctx.StatementContext, client kv.Client, return } -func indexCoveringCol(col *expression.Column, indexCols []*expression.Column, idxColLens []int) bool { +func indexColsCoveringCol(col *expression.Column, indexCols []*expression.Column, idxColLens []int, ignoreLen bool) bool { for i, indexCol := range indexCols { - isFullLen := idxColLens[i] == types.UnspecifiedLength || idxColLens[i] == col.RetType.GetFlen() - if indexCol != nil && col.EqualByExprAndID(nil, indexCol) && isFullLen { - return true + if indexCol != nil && col.EqualByExprAndID(nil, indexCol) { + if ignoreLen || idxColLens[i] == types.UnspecifiedLength || idxColLens[i] == col.RetType.GetFlen() { + return true + } } } return false } -func (ds *DataSource) isCoveringIndex(columns, indexColumns []*expression.Column, idxColLens []int, tblInfo *model.TableInfo) bool { +func (ds *DataSource) indexCoveringColumn(column *expression.Column, indexColumns []*expression.Column, idxColLens []int, ignoreLen bool) bool { + if ds.tableInfo.PKIsHandle && mysql.HasPriKeyFlag(column.RetType.GetFlag()) { + return true + } + if column.ID == model.ExtraHandleID { + return true + } + coveredByPlainIndex := indexColsCoveringCol(column, indexColumns, idxColLens, ignoreLen) + coveredByClusteredIndex := indexColsCoveringCol(column, ds.commonHandleCols, ds.commonHandleLens, ignoreLen) + if !coveredByPlainIndex && !coveredByClusteredIndex { + return false + } + isClusteredNewCollationIdx := collate.NewCollationEnabled() && + column.GetType().EvalType() == types.ETString && + !mysql.HasBinaryFlag(column.GetType().GetFlag()) + if !coveredByPlainIndex && coveredByClusteredIndex && isClusteredNewCollationIdx && ds.table.Meta().CommonHandleVersion == 0 { + return false + } + return true +} + +func (ds *DataSource) indexCoveringColumns(columns, indexColumns []*expression.Column, idxColLens []int) bool { for _, col := range columns { - if tblInfo.PKIsHandle && mysql.HasPriKeyFlag(col.RetType.GetFlag()) { - continue - } - if col.ID == model.ExtraHandleID { - continue - } - coveredByPlainIndex := indexCoveringCol(col, indexColumns, idxColLens) - coveredByClusteredIndex := indexCoveringCol(col, ds.commonHandleCols, ds.commonHandleLens) - if !coveredByPlainIndex && !coveredByClusteredIndex { + if !ds.indexCoveringColumn(col, indexColumns, idxColLens, false) { return false } - isClusteredNewCollationIdx := collate.NewCollationEnabled() && - col.GetType().EvalType() == types.ETString && - !mysql.HasBinaryFlag(col.GetType().GetFlag()) - if !coveredByPlainIndex && coveredByClusteredIndex && isClusteredNewCollationIdx && ds.table.Meta().CommonHandleVersion == 0 { + } + return true +} + +func (ds *DataSource) indexCoveringCondition(condition expression.Expression, indexColumns []*expression.Column, idxColLens []int) bool { + switch v := condition.(type) { + case *expression.Column: + return ds.indexCoveringColumn(v, indexColumns, idxColLens, false) + case *expression.ScalarFunction: + switch v.FuncName.L { + case ast.LogicOr, ast.LogicAnd: + lhsCovered := ds.indexCoveringCondition(v.GetArgs()[0], indexColumns, idxColLens) + rhsCovered := ds.indexCoveringCondition(v.GetArgs()[1], indexColumns, idxColLens) + return lhsCovered && rhsCovered + case ast.UnaryNot: + return ds.indexCoveringCondition(v.GetArgs()[0], indexColumns, idxColLens) + case ast.IsNull: + col, ok := v.GetArgs()[0].(*expression.Column) + if !ok { + return false + } + return ds.indexCoveringColumn(col, indexColumns, idxColLens, true) + } + } + return true +} + +func (ds *DataSource) isSingleScan(indexColumns []*expression.Column, idxColLens []int) bool { + if !ds.indexCoveringColumns(ds.colsRequiringFullLen, indexColumns, idxColLens) { + return false + } + for _, cond := range ds.allConds { + if !ds.indexCoveringCondition(cond, indexColumns, idxColLens) { return false } } @@ -1575,11 +1618,11 @@ func matchIndicesProp(idxCols []*expression.Column, colLens []int, propItems []p return true } -func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Expression, indexColumns []*expression.Column, idxColLens []int, - table *model.TableInfo) (indexConds, tableConds []expression.Expression) { +func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Expression, indexColumns []*expression.Column, + idxColLens []int) (indexConds, tableConds []expression.Expression) { var indexConditions, tableConditions []expression.Expression for _, cond := range conditions { - if ds.isCoveringIndex(expression.ExtractColumns(cond), indexColumns, idxColLens, table) { + if ds.indexCoveringCondition(cond, indexColumns, idxColLens) { indexConditions = append(indexConditions, cond) } else { tableConditions = append(tableConditions, cond) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 911bedace451e..0a06fe30058a9 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -7693,3 +7693,28 @@ func TestOuterJoinEliminationForIssue18216(t *testing.T) { tk.MustExec("select group_concat(c order by (select group_concat(c order by a) from t2 where a=t1.a)) from t1; ") tk.MustQuery("select group_concat(c order by (select group_concat(c order by c) from t2 where a=t1.a), c desc) from t1;").Check(testkit.Rows("2,1,4,3")) } + +func TestAvoidDoubleScanForPrefixIndex(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec(`CREATE TABLE t1 ( + id char(1) DEFAULT NULL, + c1 varchar(255) DEFAULT NULL, + c2 text DEFAULT NULL, + KEY idx1 (c1), + KEY idx2 (c1,c2(5)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin`) + tk.MustExec("insert into t1 values ('a', '0xfff', '111111'), ('b', '0xfff', '222222'), ('c', '0xfff', null)") + tk.MustQuery("explain format='brief' select count(1) from t1 where c1 = '0xfff' and c2 is not null").Check(testkit.Rows( + "StreamAgg 1.00 root funcs:count(Column#7)->Column#5", + "└─IndexReader 1.00 root index:StreamAgg", + " └─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#7", + " └─IndexRangeScan 99.90 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" -inf,\"0xfff\" +inf], keep order:false, stats:pseudo")) + tk.MustQuery("select count(1) from t1 where c1 = '0xfff' and c2 is not null").Check(testkit.Rows("2")) + tk.MustQuery("explain format='brief' select count(1) from t1 where c1 = '0xfff' and c2 is null").Check(testkit.Rows( + "StreamAgg 1.00 root funcs:count(1)->Column#5", + "└─IndexReader 0.10 root index:IndexRangeScan", + " └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" NULL,\"0xfff\" NULL], keep order:false, stats:pseudo")) + tk.MustQuery("select count(1) from t1 where c1 = '0xfff' and c2 is null").Check(testkit.Rows("1")) +} diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index e3c24d2375134..acfeaec9f138a 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -1217,6 +1217,10 @@ type DataSource struct { // contain unique index and the first field is tidb_shard(), // such as (tidb_shard(a), a ...), the fields are more than 2 containExprPrefixUk bool + + // colsRequiringFullLen is the columns that must be fetched with full length. + // It is used to decide whether single scan is enough when reading from an index. + colsRequiringFullLen []*expression.Column } // ExtractCorrelatedCols implements LogicalPlan interface. @@ -1340,7 +1344,7 @@ func (ds *DataSource) Convert2Gathers() (gathers []LogicalPlan) { path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index) path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.schema.Columns, path.Index) // If index columns can cover all of the needed columns, we can use a IndexGather + IndexScan. - if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) { + if ds.indexCoveringColumns(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens) { gathers = append(gathers, ds.buildIndexGather(path)) } // TODO: If index columns can not cover the schema, use IndexLookUpGather. @@ -1545,7 +1549,7 @@ func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, _ []expression } } var indexFilters []expression.Expression - indexFilters, path.TableFilters = ds.splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) + indexFilters, path.TableFilters = ds.splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens) path.IndexFilters = append(path.IndexFilters, indexFilters...) // If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info. // We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity. diff --git a/planner/core/rule_column_pruning.go b/planner/core/rule_column_pruning.go index f42d385acc161..5eac4a88e88bd 100644 --- a/planner/core/rule_column_pruning.go +++ b/planner/core/rule_column_pruning.go @@ -312,6 +312,14 @@ func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *log originSchemaColumns := ds.schema.Columns originColumns := ds.Columns + + ds.colsRequiringFullLen = make([]*expression.Column, 0, len(used)) + for i, col := range ds.schema.Columns { + if used[i] || (ds.containExprPrefixUk && expression.GcColumnExprIsTidbShard(col.VirtualExpr)) { + ds.colsRequiringFullLen = append(ds.colsRequiringFullLen, col) + } + } + for i := len(used) - 1; i >= 0; i-- { if !used[i] && !exprUsed[i] { // If ds has a shard index, and the column is generated column by `tidb_shard()` diff --git a/planner/core/stats.go b/planner/core/stats.go index 4ece8209397cd..6fe03f87d6a80 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -277,7 +277,7 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error { path.IsSingleScan = true } else { ds.deriveIndexPathStats(path, ds.pushedDownConds, false) - path.IsSingleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) + path.IsSingleScan = ds.isSingleScan(path.FullIdxCols, path.FullIdxColLens) } // Try some heuristic rules to select access path. if len(path.Ranges) == 0 { diff --git a/util/ranger/checker.go b/util/ranger/checker.go index 48f6e3a389a13..7d43497bd5109 100644 --- a/util/ranger/checker.go +++ b/util/ranger/checker.go @@ -24,93 +24,122 @@ import ( // conditionChecker checks if this condition can be pushed to index planner. type conditionChecker struct { - checkerCol *expression.Column - colUniqueID int64 - length int - shouldReserve bool // check if a access condition should be reserved in filter conditions. + checkerCol *expression.Column + length int } -func (c *conditionChecker) check(condition expression.Expression) bool { +func (c *conditionChecker) isFullLengthColumn() bool { + return c.length == types.UnspecifiedLength || c.length == c.checkerCol.GetType().GetFlen() +} + +// check returns two values, isAccessCond and shouldReserve. +// isAccessCond indicates whether the condition can be used to build ranges. +// shouldReserve indicates whether the condition should be reserved in filter conditions. +func (c *conditionChecker) check(condition expression.Expression) (isAccessCond, shouldReserve bool) { switch x := condition.(type) { case *expression.ScalarFunction: return c.checkScalarFunction(x) case *expression.Column: if x.RetType.EvalType() == types.ETString { - return false + return false, true } - return c.checkColumn(x) + if c.checkColumn(x) { + return true, !c.isFullLengthColumn() + } + return false, true case *expression.Constant: - return true + return true, false } - return false + return false, true } -func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction) bool { +func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction) (isAccessCond, shouldReserve bool) { _, collation := scalar.CharsetAndCollation() switch scalar.FuncName.L { case ast.LogicOr, ast.LogicAnd: - return c.check(scalar.GetArgs()[0]) && c.check(scalar.GetArgs()[1]) + isAccessCond0, shouldReserve0 := c.check(scalar.GetArgs()[0]) + isAccessCond1, shouldReserve1 := c.check(scalar.GetArgs()[1]) + if isAccessCond0 && isAccessCond1 { + return true, shouldReserve0 || shouldReserve1 + } + return false, true case ast.EQ, ast.NE, ast.GE, ast.GT, ast.LE, ast.LT, ast.NullEQ: if _, ok := scalar.GetArgs()[0].(*expression.Constant); ok { if c.checkColumn(scalar.GetArgs()[1]) { // Checks whether the scalar function is calculated use the collation compatible with the column. if scalar.GetArgs()[1].GetType().EvalType() == types.ETString && !collate.CompatibleCollate(scalar.GetArgs()[1].GetType().GetCollate(), collation) { - return false + return false, true + } + isFullLength := c.isFullLengthColumn() + if scalar.FuncName.L == ast.NE { + return isFullLength, !isFullLength } - return scalar.FuncName.L != ast.NE || c.length == types.UnspecifiedLength + return true, !isFullLength } } if _, ok := scalar.GetArgs()[1].(*expression.Constant); ok { if c.checkColumn(scalar.GetArgs()[0]) { // Checks whether the scalar function is calculated use the collation compatible with the column. if scalar.GetArgs()[0].GetType().EvalType() == types.ETString && !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) { - return false + return false, true + } + isFullLength := c.isFullLengthColumn() + if scalar.FuncName.L == ast.NE { + return isFullLength, !isFullLength } - return scalar.FuncName.L != ast.NE || c.length == types.UnspecifiedLength + return true, !isFullLength } } case ast.IsNull: - return c.checkColumn(scalar.GetArgs()[0]) + if c.checkColumn(scalar.GetArgs()[0]) { + // We can know whether the column is null from prefix column of any length. + return true, false + } + return false, true case ast.IsTruthWithoutNull, ast.IsFalsity, ast.IsTruthWithNull: if s, ok := scalar.GetArgs()[0].(*expression.Column); ok { if s.RetType.EvalType() == types.ETString { - return false + return false, true } } - return c.checkColumn(scalar.GetArgs()[0]) + if c.checkColumn(scalar.GetArgs()[0]) { + return true, !c.isFullLengthColumn() + } + return false, true case ast.UnaryNot: // TODO: support "not like" convert to access conditions. s, ok := scalar.GetArgs()[0].(*expression.ScalarFunction) if !ok { // "not column" or "not constant" can't lead to a range. - return false + return false, true } if s.FuncName.L == ast.Like || s.FuncName.L == ast.NullEQ { - return false + return false, true } return c.check(scalar.GetArgs()[0]) case ast.In: if !c.checkColumn(scalar.GetArgs()[0]) { - return false + return false, true } if scalar.GetArgs()[0].GetType().EvalType() == types.ETString && !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) { - return false + return false, true } for _, v := range scalar.GetArgs()[1:] { if _, ok := v.(*expression.Constant); !ok { - return false + return false, true } } - return true + return true, !c.isFullLengthColumn() case ast.Like: return c.checkLikeFunc(scalar) case ast.GetParam: - return true + // TODO + return true, false } - return false + return false, true } -func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) bool { +func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isAccessCond, shouldReserve bool) { _, collation := scalar.CharsetAndCollation() if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) { // The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte. @@ -120,29 +149,30 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) bool // Finally, the range comes to be [`, A], which is actually an empty range. // See https://github.com/pingcap/tidb/issues/31174 for more details. // In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range. - return false + return false, true } if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) { - return false + return false, true } if !c.checkColumn(scalar.GetArgs()[0]) { - return false + return false, true } pattern, ok := scalar.GetArgs()[1].(*expression.Constant) if !ok { - return false + return false, true } if pattern.Value.IsNull() { - return false + return false, true } patternStr, err := pattern.Value.ToString() if err != nil { - return false + return false, true } if len(patternStr) == 0 { - return true + return true, !c.isFullLengthColumn() } escape := byte(scalar.GetArgs()[2].(*expression.Constant).Value.GetInt64()) + likeFuncReserve := !c.isFullLengthColumn() for i := 0; i < len(patternStr); i++ { if patternStr[i] == escape { i++ @@ -152,16 +182,16 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) bool break } if i == 0 && (patternStr[i] == '%' || patternStr[i] == '_') { - return false + return false, true } if patternStr[i] == '%' { // We currently do not support using `enum like 'xxx%'` to build range // see https://github.com/pingcap/tidb/issues/27130 for more details if scalar.GetArgs()[0].GetType().GetType() == mysql.TypeEnum { - return false + return false, true } if i != len(patternStr)-1 { - c.shouldReserve = true + likeFuncReserve = true } break } @@ -169,23 +199,19 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) bool // We currently do not support using `enum like 'xxx_'` to build range // see https://github.com/pingcap/tidb/issues/27130 for more details if scalar.GetArgs()[0].GetType().GetType() == mysql.TypeEnum { - return false + return false, true } - c.shouldReserve = true + likeFuncReserve = true break } } - return true + return true, likeFuncReserve } func (c *conditionChecker) checkColumn(expr expression.Expression) bool { - col, ok := expr.(*expression.Column) - if !ok { - return false - } // Check if virtual expression column matched if c.checkerCol != nil { - return c.checkerCol.EqualByExprAndID(nil, col) + return c.checkerCol.EqualByExprAndID(nil, expr) } - return c.colUniqueID == col.UniqueID + return false } diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index cd47c50d3cce0..0ab25fbfc542a 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -49,14 +49,14 @@ func detachColumnCNFConditions(sctx sessionctx.Context, conditions []expression. accessConditions = append(accessConditions, rebuildDNF) continue } - if !checker.check(cond) { + isAccessCond, shouldReserve := checker.check(cond) + if !isAccessCond { filterConditions = append(filterConditions, cond) continue } accessConditions = append(accessConditions, cond) - if checker.shouldReserve { + if shouldReserve { filterConditions = append(filterConditions, cond) - checker.shouldReserve = checker.length != types.UnspecifiedLength } } return accessConditions, filterConditions @@ -82,13 +82,14 @@ func detachColumnDNFConditions(sctx sessionctx.Context, conditions []expression. } rebuildCNF := expression.ComposeCNFCondition(sctx, columnCNFItems...) accessConditions = append(accessConditions, rebuildCNF) - } else if !checker.check(cond) { - return nil, true } else { + isAccessCond, shouldReserve := checker.check(cond) + if !isAccessCond { + return nil, true + } accessConditions = append(accessConditions, cond) - if checker.shouldReserve { + if shouldReserve { hasResidualConditions = true - checker.shouldReserve = checker.length != types.UnspecifiedLength } } } @@ -331,9 +332,8 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi return res, nil } checker := &conditionChecker{ - checkerCol: d.cols[eqOrInCount], - length: d.lengths[eqOrInCount], - shouldReserve: d.lengths[eqOrInCount] != types.UnspecifiedLength, + checkerCol: d.cols[eqOrInCount], + length: d.lengths[eqOrInCount], } if considerDNF { pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) @@ -410,7 +410,8 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi return res, nil } for _, cond := range newConditions { - if !checker.check(cond) { + isAccessCond, _ := checker.check(cond) + if !isAccessCond { filterConds = append(filterConds, cond) continue } @@ -642,7 +643,8 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex // However, please notice that if you're implementing this, please (1) set StatementContext.OptimDependOnMutableConst to true, // or (2) don't do this optimization when StatementContext.UseCache is true. That's because this plan is affected by // flen of user variable, we cannot cache this plan. - if lengths[i] != types.UnspecifiedLength { + isFullLength := lengths[i] == types.UnspecifiedLength || lengths[i] == cols[i].GetType().GetFlen() + if !isFullLength { filters = append(filters, cond) } } @@ -655,9 +657,8 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex // We will detach the conditions of every DNF items, then compose them to a DNF. func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) (Ranges, []expression.Expression, []*valueInfo, bool, error) { firstColumnChecker := &conditionChecker{ - checkerCol: d.cols[0], - shouldReserve: d.lengths[0] != types.UnspecifiedLength, - length: d.lengths[0], + checkerCol: d.cols[0], + length: d.lengths[0], } rb := builder{sc: d.sctx.GetSessionVars().StmtCtx} dnfItems := expression.FlattenDNFConditions(condition) @@ -709,12 +710,13 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression } } } - } else if !firstColumnChecker.check(item) { - return FullRange(), nil, nil, true, nil } else { - if firstColumnChecker.shouldReserve { + isAccessCond, shouldReserve := firstColumnChecker.check(item) + if !isAccessCond { + return FullRange(), nil, nil, true, nil + } + if shouldReserve { hasResidual = true - firstColumnChecker.shouldReserve = d.lengths[0] != types.UnspecifiedLength } points := rb.build(item, collate.GetCollator(newTpSlice[0].GetCollate())) // TODO: restrict the mem usage of ranges @@ -923,7 +925,11 @@ func ExtractAccessConditionsForColumn(conds []expression.Expression, col *expres length: types.UnspecifiedLength, } accessConds := make([]expression.Expression, 0, 8) - return expression.Filter(accessConds, conds, checker.check) + filter := func(expr expression.Expression) bool { + isAccessCond, _ := checker.check(expr) + return isAccessCond + } + return expression.Filter(accessConds, conds, filter) } // DetachCondsForColumn detaches access conditions for specified column from other filter conditions. @@ -958,7 +964,8 @@ func MergeDNFItems4Col(ctx sessionctx.Context, dnfItems []expression.Expression) // Currently, we assume if every condition in a DNF expression can pass this check, then `Selectivity` must be able to // cover this entire DNF directly without recursively call `Selectivity`. If this doesn't hold in the future, this logic // may cause infinite recursion in `Selectivity`. - if !checker.check(dnfItem) { + isAccessCond, _ := checker.check(dnfItem) + if !isAccessCond { mergedDNFItems = append(mergedDNFItems, dnfItem) continue } From c466623bc7ed60b6f023d7e362fb84860db5be3c Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Wed, 19 Oct 2022 22:47:29 +0800 Subject: [PATCH 02/21] upd --- planner/core/logical_plans.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index acfeaec9f138a..af3bcc1fef315 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -1344,7 +1344,7 @@ func (ds *DataSource) Convert2Gathers() (gathers []LogicalPlan) { path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index) path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.schema.Columns, path.Index) // If index columns can cover all of the needed columns, we can use a IndexGather + IndexScan. - if ds.indexCoveringColumns(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens) { + if ds.isSingleScan(path.FullIdxCols, path.FullIdxColLens) { gathers = append(gathers, ds.buildIndexGather(path)) } // TODO: If index columns can not cover the schema, use IndexLookUpGather. From 40b40abcb1f34554cf81a0fba12f4c39a28dade7 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 00:09:04 +0800 Subject: [PATCH 03/21] fix ut --- planner/core/find_best_task.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index a74b14e412dd0..037221faebc5d 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1340,6 +1340,8 @@ func (ds *DataSource) indexCoveringCondition(condition expression.Expression, in return false } return ds.indexCoveringColumn(col, indexColumns, idxColLens, true) + default: + return ds.indexCoveringColumns(expression.ExtractColumns(v), indexColumns, idxColLens) } } return true From 71da735a2cc2a3afb1a2b632e668aa477cdf096f Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 13:19:51 +0800 Subject: [PATCH 04/21] fix ut --- planner/core/find_best_task.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 037221faebc5d..153410e86d0f7 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1348,6 +1348,11 @@ func (ds *DataSource) indexCoveringCondition(condition expression.Expression, in } func (ds *DataSource) isSingleScan(indexColumns []*expression.Column, idxColLens []int) bool { + if ds.colsRequiringFullLen == nil { + // ds.colsRequiringFullLen is set at (*DataSource).PruneColumns. In some cases we don't reach (*DataSource).PruneColumns + // and ds.colsRequiringFullLen is nil, so we fall back to ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens). + return ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens) + } if !ds.indexCoveringColumns(ds.colsRequiringFullLen, indexColumns, idxColLens) { return false } From 33c718275486152733bf3abaca26a8db2f54fb00 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 16:10:14 +0800 Subject: [PATCH 05/21] fix ut --- .../core/testdata/integration_suite_out.json | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 4ffc80ce82787..c875a3e728942 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -7407,10 +7407,10 @@ "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.e)), not(isnull(test.t2.g))", "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", "└─IndexLookUp(Probe) 0.50 root ", - " ├─Selection(Build) 249.75 cop[tikv] not(isnull(test.t1.b))", + " ├─Selection(Build) 249.50 cop[tikv] not(isnull(test.t1.b)), not(isnull(test.t1.d))", " │ └─IndexRangeScan 250.00 cop[tikv] table:t1, index:idx_a_b_c_d(a, b, c, d) range: decided by [eq(test.t1.b, test.t2.e) eq(test.t1.d, test.t2.g) in(test.t1.a, 1, 3) in(test.t1.c, aaa, bbb)], keep order:false, stats:pseudo", - " └─Selection(Probe) 0.50 cop[tikv] in(test.t1.c, \"aaa\", \"bbb\"), not(isnull(test.t1.d))", - " └─TableRowIDScan 249.75 cop[tikv] table:t1 keep order:false, stats:pseudo" + " └─Selection(Probe) 0.50 cop[tikv] in(test.t1.c, \"aaa\", \"bbb\")", + " └─TableRowIDScan 249.50 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, @@ -7427,10 +7427,10 @@ "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.e)), not(isnull(test.t2.g))", "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", "└─IndexLookUp(Probe) 0.50 root ", - " ├─Selection(Build) 249.75 cop[tikv] not(isnull(test.t1.b))", + " ├─Selection(Build) 249.50 cop[tikv] not(isnull(test.t1.b)), not(isnull(test.t1.d))", " │ └─IndexRangeScan 250.00 cop[tikv] table:t1, index:idx_a_b_c_d(a, b, c, d) range: decided by [eq(test.t1.b, test.t2.e) in(test.t1.a, 1, 3) in(test.t1.c, aaa, bbb)], keep order:false, stats:pseudo", - " └─Selection(Probe) 0.50 cop[tikv] in(test.t1.c, \"aaa\", \"bbb\"), not(isnull(test.t1.d))", - " └─TableRowIDScan 249.75 cop[tikv] table:t1 keep order:false, stats:pseudo" + " └─Selection(Probe) 0.50 cop[tikv] in(test.t1.c, \"aaa\", \"bbb\")", + " └─TableRowIDScan 249.50 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Warn": [ "Memory capacity of 2900 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen" @@ -7449,10 +7449,10 @@ "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.e)), not(isnull(test.t2.g))", "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", "└─IndexLookUp(Probe) 0.50 root ", - " ├─Selection(Build) 249.75 cop[tikv] not(isnull(test.t1.b))", + " ├─Selection(Build) 249.50 cop[tikv] not(isnull(test.t1.b)), not(isnull(test.t1.d))", " │ └─IndexRangeScan 250.00 cop[tikv] table:t1, index:idx_a_b_c_d(a, b, c, d) range: decided by [eq(test.t1.b, test.t2.e) in(test.t1.a, 1, 3)], keep order:false, stats:pseudo", - " └─Selection(Probe) 0.50 cop[tikv] in(test.t1.c, \"aaa\", \"bbb\"), not(isnull(test.t1.d))", - " └─TableRowIDScan 249.75 cop[tikv] table:t1 keep order:false, stats:pseudo" + " └─Selection(Probe) 0.50 cop[tikv] in(test.t1.c, \"aaa\", \"bbb\")", + " └─TableRowIDScan 249.50 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Warn": [ "Memory capacity of 2300 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen" @@ -7468,10 +7468,10 @@ "Plan": [ "HashJoin 0.05 root inner join, equal:[eq(test.t1.b, test.t2.e) eq(test.t1.d, test.t2.g)]", "├─IndexLookUp(Build) 0.04 root ", - "│ ├─Selection(Build) 19.98 cop[tikv] not(isnull(test.t1.b))", + "│ ├─Selection(Build) 19.96 cop[tikv] not(isnull(test.t1.b)), not(isnull(test.t1.d))", "│ │ └─IndexRangeScan 20.00 cop[tikv] table:t1, index:idx_a_b_c_d(a, b, c, d) range:[1,1], [3,3], keep order:false, stats:pseudo", - "│ └─Selection(Probe) 0.04 cop[tikv] in(test.t1.c, \"aaa\", \"bbb\"), not(isnull(test.t1.d))", - "│ └─TableRowIDScan 19.98 cop[tikv] table:t1 keep order:false, stats:pseudo", + "│ └─Selection(Probe) 0.04 cop[tikv] in(test.t1.c, \"aaa\", \"bbb\")", + "│ └─TableRowIDScan 19.96 cop[tikv] table:t1 keep order:false, stats:pseudo", "└─TableReader(Probe) 9980.01 root data:Selection", " └─Selection 9980.01 cop[tikv] not(isnull(test.t2.e)), not(isnull(test.t2.g))", " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" From e2be1e59c8cabd1f2cfa949437337c4eb93a56a3 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 17:36:00 +0800 Subject: [PATCH 06/21] add variable tidb_prefer_prefix_index_single_scan --- executor/set_test.go | 14 +++++++++++++ planner/core/find_best_task.go | 10 +++++++-- planner/core/logical_plan_trace_test.go | 2 ++ sessionctx/variable/session.go | 4 ++++ sessionctx/variable/sysvar.go | 4 ++++ sessionctx/variable/tidb_vars.go | 5 +++++ statistics/selectivity.go | 2 +- util/ranger/checker.go | 12 +++++++---- util/ranger/detacher.go | 27 +++++++++++++++---------- util/ranger/ranger_test.go | 2 +- 10 files changed, 63 insertions(+), 19 deletions(-) diff --git a/executor/set_test.go b/executor/set_test.go index 0816dfb74df97..4c617a8dc478c 100644 --- a/executor/set_test.go +++ b/executor/set_test.go @@ -829,6 +829,20 @@ func TestSetVar(t *testing.T) { tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1")) // min value is 1 tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 9999") tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1024")) // max value is 1024 + + // test variable 'tidb_prefer_prefix_index_single_scan' + // global scope + tk.MustQuery("select @@global.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("1")) // default value + tk.MustExec("set global tidb_prefer_prefix_index_single_scan = 0") + tk.MustQuery("select @@global.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("0")) + tk.MustExec("set global tidb_prefer_prefix_index_single_scan = 1") + tk.MustQuery("select @@global.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("1")) + // session scope + tk.MustQuery("select @@session.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("1")) // default value + tk.MustExec("set session tidb_prefer_prefix_index_single_scan = 0") + tk.MustQuery("select @@session.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("0")) + tk.MustExec("set session tidb_prefer_prefix_index_single_scan = 1") + tk.MustQuery("select @@session.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("1")) } func TestGetSetNoopVars(t *testing.T) { diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 153410e86d0f7..36108fc1178be 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1348,7 +1348,7 @@ func (ds *DataSource) indexCoveringCondition(condition expression.Expression, in } func (ds *DataSource) isSingleScan(indexColumns []*expression.Column, idxColLens []int) bool { - if ds.colsRequiringFullLen == nil { + if !ds.ctx.GetSessionVars().PreferPrefixIndexSingleScan || ds.colsRequiringFullLen == nil { // ds.colsRequiringFullLen is set at (*DataSource).PruneColumns. In some cases we don't reach (*DataSource).PruneColumns // and ds.colsRequiringFullLen is nil, so we fall back to ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens). return ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens) @@ -1629,7 +1629,13 @@ func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Express idxColLens []int) (indexConds, tableConds []expression.Expression) { var indexConditions, tableConditions []expression.Expression for _, cond := range conditions { - if ds.indexCoveringCondition(cond, indexColumns, idxColLens) { + var covered bool + if ds.ctx.GetSessionVars().PreferPrefixIndexSingleScan { + covered = ds.indexCoveringCondition(cond, indexColumns, idxColLens) + } else { + covered = ds.indexCoveringColumns(expression.ExtractColumns(cond), indexColumns, idxColLens) + } + if covered { indexConditions = append(indexConditions, cond) } else { tableConditions = append(tableConditions, cond) diff --git a/planner/core/logical_plan_trace_test.go b/planner/core/logical_plan_trace_test.go index 7233b49cb24e1..88e3f9059e910 100644 --- a/planner/core/logical_plan_trace_test.go +++ b/planner/core/logical_plan_trace_test.go @@ -396,6 +396,7 @@ func TestSingleRuleTraceStep(t *testing.T) { s := createPlannerSuite() for i, tc := range tt { sql := tc.sql + fmt.Printf("\nsql:%v\nflags:%v\nassertRuleName:%v\nassertRuleSteps:%v\n", tc.sql, tc.flags, tc.assertRuleName, tc.assertRuleSteps) comment := fmt.Sprintf("case:%v sql:%s", i, sql) stmt, err := s.p.ParseOneStmt(sql, "", "") require.NoError(t, err, comment) @@ -419,6 +420,7 @@ func TestSingleRuleTraceStep(t *testing.T) { require.NotNil(t, trace, comment) assert := false for _, step := range trace.Steps { + fmt.Printf("trace step:%v\n", step.RuleName) if step.RuleName == tc.assertRuleName { assert = true for i, ruleStep := range step.Steps { diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 12841c40a67f5..da3909e36e507 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -1275,6 +1275,10 @@ type SessionVars struct { AnalyzePartitionMergeConcurrency int HookContext + + // PreferPrefixIndexSingleScan indicates whether to do some optimizations to avoid double scan for prefix index. + // When set to true, `col is (not) null`(`col` is index prefix column) is regarded as index filter rather than table filter. + PreferPrefixIndexSingleScan bool } // GetPreparedStmtByName returns the prepared statement specified by stmtName. diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index a95d497280f04..54e85b85922c7 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -1924,6 +1924,10 @@ var defaultSysVars = []*SysVar{ return nil }, }, + {Scope: ScopeGlobal | ScopeSession, Name: TiDBPreferPrefixIndexSingleScan, Value: BoolToOnOff(DefTiDBPreferPrefixIndexSingleScan), Type: TypeBool, SetSession: func(s *SessionVars, val string) error { + s.PreferPrefixIndexSingleScan = TiDBOptOn(val) + return nil + }}, } // FeedbackProbability points to the FeedbackProbability in statistics package. diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 38fecce6f95f4..4b8da849205b8 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -752,6 +752,10 @@ const ( // TiDBMergePartitionStatsConcurrency indicates the concurrecny when merge partition stats into global stats TiDBMergePartitionStatsConcurrency = "tidb_merge_partition_stats_concurrency" + + // TiDBPreferPrefixIndexSingleScan indicates whether to do some optimizations to avoid double scan for prefix index. + // When set to true, `col is (not) null`(`col` is index prefix column) is regarded as index filter rather than table filter. + TiDBPreferPrefixIndexSingleScan = "tidb_prefer_prefix_index_single_scan" ) // TiDB vars that have only global scope @@ -1063,6 +1067,7 @@ const ( DefTiDBMergePartitionStatsConcurrency = 1 DefTiDBServerMemoryLimitGCTrigger = 0.7 DefTiDBEnableGOGCTuner = true + DefTiDBPreferPrefixIndexSingleScan = true ) // Process global variables. diff --git a/statistics/selectivity.go b/statistics/selectivity.go index 45de31365f3d6..99458cc04bea6 100644 --- a/statistics/selectivity.go +++ b/statistics/selectivity.go @@ -498,7 +498,7 @@ func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, ran var accessConds, remainedConds []expression.Expression switch rangeType { case ranger.ColumnRangeType: - accessConds = ranger.ExtractAccessConditionsForColumn(exprs, cols[0]) + accessConds = ranger.ExtractAccessConditionsForColumn(ctx, exprs, cols[0]) ranges, accessConds, _, err = ranger.BuildColumnRange(accessConds, ctx, cols[0].RetType, types.UnspecifiedLength, ctx.GetSessionVars().RangeMaxSize) case ranger.IndexRangeType: if cachedPath != nil { diff --git a/util/ranger/checker.go b/util/ranger/checker.go index 7d43497bd5109..9781ced9a992d 100644 --- a/util/ranger/checker.go +++ b/util/ranger/checker.go @@ -24,8 +24,9 @@ import ( // conditionChecker checks if this condition can be pushed to index planner. type conditionChecker struct { - checkerCol *expression.Column - length int + checkerCol *expression.Column + length int + preferPrefixIndexSingleScan bool } func (c *conditionChecker) isFullLengthColumn() bool { @@ -92,8 +93,11 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction } case ast.IsNull: if c.checkColumn(scalar.GetArgs()[0]) { - // We can know whether the column is null from prefix column of any length. - return true, false + var isNullReserve bool // We can know whether the column is null from prefix column of any length. + if !c.preferPrefixIndexSingleScan { + isNullReserve = !c.isFullLengthColumn() + } + return true, isNullReserve } return false, true case ast.IsTruthWithoutNull, ast.IsFalsity, ast.IsTruthWithNull: diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 0ab25fbfc542a..c447b1507f18d 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -332,8 +332,9 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi return res, nil } checker := &conditionChecker{ - checkerCol: d.cols[eqOrInCount], - length: d.lengths[eqOrInCount], + checkerCol: d.cols[eqOrInCount], + length: d.lengths[eqOrInCount], + preferPrefixIndexSingleScan: d.sctx.GetSessionVars().PreferPrefixIndexSingleScan, } if considerDNF { pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) @@ -657,8 +658,9 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex // We will detach the conditions of every DNF items, then compose them to a DNF. func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) (Ranges, []expression.Expression, []*valueInfo, bool, error) { firstColumnChecker := &conditionChecker{ - checkerCol: d.cols[0], - length: d.lengths[0], + checkerCol: d.cols[0], + length: d.lengths[0], + preferPrefixIndexSingleScan: d.sctx.GetSessionVars().PreferPrefixIndexSingleScan, } rb := builder{sc: d.sctx.GetSessionVars().StmtCtx} dnfItems := expression.FlattenDNFConditions(condition) @@ -919,10 +921,11 @@ func AppendConditionsIfNotExist(conditions, condsToAppend []expression.Expressio // ExtractAccessConditionsForColumn extracts the access conditions used for range calculation. Since // we don't need to return the remained filter conditions, it is much simpler than DetachCondsForColumn. -func ExtractAccessConditionsForColumn(conds []expression.Expression, col *expression.Column) []expression.Expression { +func ExtractAccessConditionsForColumn(ctx sessionctx.Context, conds []expression.Expression, col *expression.Column) []expression.Expression { checker := conditionChecker{ - checkerCol: col, - length: types.UnspecifiedLength, + checkerCol: col, + length: types.UnspecifiedLength, + preferPrefixIndexSingleScan: ctx.GetSessionVars().PreferPrefixIndexSingleScan, } accessConds := make([]expression.Expression, 0, 8) filter := func(expr expression.Expression) bool { @@ -935,8 +938,9 @@ func ExtractAccessConditionsForColumn(conds []expression.Expression, col *expres // DetachCondsForColumn detaches access conditions for specified column from other filter conditions. func DetachCondsForColumn(sctx sessionctx.Context, conds []expression.Expression, col *expression.Column) (accessConditions, otherConditions []expression.Expression) { checker := &conditionChecker{ - checkerCol: col, - length: types.UnspecifiedLength, + checkerCol: col, + length: types.UnspecifiedLength, + preferPrefixIndexSingleScan: sctx.GetSessionVars().PreferPrefixIndexSingleScan, } return detachColumnCNFConditions(sctx, conds, checker) } @@ -957,8 +961,9 @@ func MergeDNFItems4Col(ctx sessionctx.Context, dnfItems []expression.Expression) uniqueID := cols[0].UniqueID checker := &conditionChecker{ - checkerCol: cols[0], - length: types.UnspecifiedLength, + checkerCol: cols[0], + length: types.UnspecifiedLength, + preferPrefixIndexSingleScan: ctx.GetSessionVars().PreferPrefixIndexSingleScan, } // If we can't use this condition to build range, we can't merge it. // Currently, we assume if every condition in a DNF expression can pass this check, then `Selectivity` must be able to diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index 411abe846499b..ac767c7f54f38 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -827,7 +827,7 @@ func TestColumnRange(t *testing.T) { } col := expression.ColInfo2Col(sel.Schema().Columns, ds.TableInfo().Columns[tt.colPos]) require.NotNil(t, col) - conds = ranger.ExtractAccessConditionsForColumn(conds, col) + conds = ranger.ExtractAccessConditionsForColumn(sctx, conds, col) require.Equal(t, tt.accessConds, fmt.Sprintf("%s", conds)) result, _, _, err := ranger.BuildColumnRange(conds, sctx, col.RetType, tt.length, 0) require.NoError(t, err) From 0e2297d33d8a94f369d195459d7d32c5d1c5ded5 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 18:06:29 +0800 Subject: [PATCH 07/21] debug TestSingleRuleTraceStep --- planner/core/logical_plan_trace_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/planner/core/logical_plan_trace_test.go b/planner/core/logical_plan_trace_test.go index 88e3f9059e910..36a1ffe586e27 100644 --- a/planner/core/logical_plan_trace_test.go +++ b/planner/core/logical_plan_trace_test.go @@ -403,6 +403,7 @@ func TestSingleRuleTraceStep(t *testing.T) { err = Preprocess(context.Background(), s.ctx, stmt, WithPreprocessorReturn(&PreprocessorReturn{InfoSchema: s.is})) require.NoError(t, err, comment) sctx := MockContext() + sctx.GetSessionVars().PreferPrefixIndexSingleScan = false sctx.GetSessionVars().StmtCtx.EnableOptimizeTrace = true sctx.GetSessionVars().AllowAggPushDown = true builder, _ := NewPlanBuilder().Init(sctx, s.is, &hint.BlockHintProcessor{}) From d2a31d4f45c576e51517bac20020e4ccd0df3b41 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 18:24:29 +0800 Subject: [PATCH 08/21] debug 2 --- planner/core/optimizer.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/planner/core/optimizer.go b/planner/core/optimizer.go index 4623af7261104..731830ca40fa8 100644 --- a/planner/core/optimizer.go +++ b/planner/core/optimizer.go @@ -16,6 +16,7 @@ package core import ( "context" + "fmt" "math" "github.com/pingcap/errors" @@ -650,6 +651,7 @@ func logicalOptimize(ctx context.Context, flag uint64, logic LogicalPlan) (Logic func isLogicalRuleDisabled(r logicalOptRule) bool { disabled := DefaultDisabledLogicalRulesList.Load().(set.StringSet).Exist(r.name()) + logutil.BgLogger().Info(fmt.Sprintf("rule:%v, disabled:%v", r.name(), disabled)) return disabled } From a9018a22f7ce5b46afc1022753ff24e994d07eed Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 20:51:19 +0800 Subject: [PATCH 09/21] fix --- planner/core/integration_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 0a06fe30058a9..2d661aa56f23e 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -7528,6 +7528,10 @@ func TestCorColRangeWithRangeMaxSize(t *testing.T) { tk.MustExec("insert into t3 values (2), (4)") tk.MustExec("insert into mysql.opt_rule_blacklist value(\"decorrelate\")") tk.MustExec("admin reload opt_rule_blacklist") + defer func() { + tk.MustExec("delete from mysql.opt_rule_blacklist where name = \"decorrelate\"") + tk.MustExec("admin reload opt_rule_blacklist") + }() // Correlated column in index range. tk.MustExec("set @@tidb_opt_range_max_size=1000") From bcf6a3b8b81f1f16ded3b89a0ac5d46edac01a32 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 21:19:35 +0800 Subject: [PATCH 10/21] fix test --- planner/core/integration_test.go | 30 +++++++---- planner/core/logical_plan_trace_test.go | 2 - planner/core/optimizer.go | 2 - .../core/testdata/integration_suite_in.json | 9 ++++ .../core/testdata/integration_suite_out.json | 54 +++++++++++++++++++ 5 files changed, 82 insertions(+), 15 deletions(-) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 2d661aa56f23e..ac3e8c6c2e230 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -7709,16 +7709,24 @@ func TestAvoidDoubleScanForPrefixIndex(t *testing.T) { KEY idx1 (c1), KEY idx2 (c1,c2(5)) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin`) + tk.MustExec("set tidb_prefer_prefix_index_single_scan = 1") tk.MustExec("insert into t1 values ('a', '0xfff', '111111'), ('b', '0xfff', '222222'), ('c', '0xfff', null)") - tk.MustQuery("explain format='brief' select count(1) from t1 where c1 = '0xfff' and c2 is not null").Check(testkit.Rows( - "StreamAgg 1.00 root funcs:count(Column#7)->Column#5", - "└─IndexReader 1.00 root index:StreamAgg", - " └─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#7", - " └─IndexRangeScan 99.90 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" -inf,\"0xfff\" +inf], keep order:false, stats:pseudo")) - tk.MustQuery("select count(1) from t1 where c1 = '0xfff' and c2 is not null").Check(testkit.Rows("2")) - tk.MustQuery("explain format='brief' select count(1) from t1 where c1 = '0xfff' and c2 is null").Check(testkit.Rows( - "StreamAgg 1.00 root funcs:count(1)->Column#5", - "└─IndexReader 0.10 root index:IndexRangeScan", - " └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" NULL,\"0xfff\" NULL], keep order:false, stats:pseudo")) - tk.MustQuery("select count(1) from t1 where c1 = '0xfff' and c2 is null").Check(testkit.Rows("1")) + + var input []string + var output []struct { + SQL string + Plan []string + Result []string + } + integrationSuiteData := core.GetIntegrationSuiteData() + integrationSuiteData.LoadTestCases(t, &input, &output) + for i, tt := range input { + testdata.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery("explain format='brief' " + tt).Rows()) + output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(tt).Rows()) + }) + tk.MustQuery("explain format='brief' " + tt).Check(testkit.Rows(output[i].Plan...)) + tk.MustQuery(tt).Check(testkit.Rows(output[i].Result...)) + } } diff --git a/planner/core/logical_plan_trace_test.go b/planner/core/logical_plan_trace_test.go index 36a1ffe586e27..f28b5814c4098 100644 --- a/planner/core/logical_plan_trace_test.go +++ b/planner/core/logical_plan_trace_test.go @@ -396,7 +396,6 @@ func TestSingleRuleTraceStep(t *testing.T) { s := createPlannerSuite() for i, tc := range tt { sql := tc.sql - fmt.Printf("\nsql:%v\nflags:%v\nassertRuleName:%v\nassertRuleSteps:%v\n", tc.sql, tc.flags, tc.assertRuleName, tc.assertRuleSteps) comment := fmt.Sprintf("case:%v sql:%s", i, sql) stmt, err := s.p.ParseOneStmt(sql, "", "") require.NoError(t, err, comment) @@ -421,7 +420,6 @@ func TestSingleRuleTraceStep(t *testing.T) { require.NotNil(t, trace, comment) assert := false for _, step := range trace.Steps { - fmt.Printf("trace step:%v\n", step.RuleName) if step.RuleName == tc.assertRuleName { assert = true for i, ruleStep := range step.Steps { diff --git a/planner/core/optimizer.go b/planner/core/optimizer.go index 731830ca40fa8..4623af7261104 100644 --- a/planner/core/optimizer.go +++ b/planner/core/optimizer.go @@ -16,7 +16,6 @@ package core import ( "context" - "fmt" "math" "github.com/pingcap/errors" @@ -651,7 +650,6 @@ func logicalOptimize(ctx context.Context, flag uint64, logic LogicalPlan) (Logic func isLogicalRuleDisabled(r logicalOptRule) bool { disabled := DefaultDisabledLogicalRulesList.Load().(set.StringSet).Exist(r.name()) - logutil.BgLogger().Info(fmt.Sprintf("rule:%v, disabled:%v", r.name(), disabled)) return disabled } diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 940c67c7f0f47..7d8511cb18fe0 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -1011,5 +1011,14 @@ "set @@tidb_opt_range_max_size = 300", "explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.e where t1.b > t2.f and t1.b < t2.f + 10" ] + }, + { + "name": "TestAvoidDoubleScanForPrefixIndex", + "cases": [ + "select count(1) from t1 where c1 = '0xfff' and c2 is not null", + "select count(1) from t1 where c1 = '0xfff' and c2 is null", + "select count(1) from t1 where c1 >= '0xfff' and c2 is not null", + "select count(1) from t1 where c1 >= '0xfff' and c2 is null" + ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index c875a3e728942..f7b9cb60193d3 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -7562,5 +7562,59 @@ ] } ] + }, + { + "Name": "TestAvoidDoubleScanForPrefixIndex", + "Cases": [ + { + "SQL": "select count(1) from t1 where c1 = '0xfff' and c2 is not null", + "Plan": [ + "StreamAgg 1.00 root funcs:count(Column#7)->Column#5", + "└─IndexReader 1.00 root index:StreamAgg", + " └─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#7", + " └─IndexRangeScan 99.90 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" -inf,\"0xfff\" +inf], keep order:false, stats:pseudo" + ], + "Result": [ + "2" + ] + }, + { + "SQL": "select count(1) from t1 where c1 = '0xfff' and c2 is null", + "Plan": [ + "StreamAgg 1.00 root funcs:count(1)->Column#5", + "└─IndexReader 0.10 root index:IndexRangeScan", + " └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" NULL,\"0xfff\" NULL], keep order:false, stats:pseudo" + ], + "Result": [ + "1" + ] + }, + { + "SQL": "select count(1) from t1 where c1 >= '0xfff' and c2 is not null", + "Plan": [ + "StreamAgg 1.00 root funcs:count(Column#7)->Column#5", + "└─IndexReader 1.00 root index:StreamAgg", + " └─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#7", + " └─Selection 3330.00 cop[tikv] not(isnull(test.t1.c2))", + " └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\",+inf], keep order:false, stats:pseudo" + ], + "Result": [ + "2" + ] + }, + { + "SQL": "select count(1) from t1 where c1 >= '0xfff' and c2 is null", + "Plan": [ + "StreamAgg 1.00 root funcs:count(Column#7)->Column#5", + "└─IndexReader 1.00 root index:StreamAgg", + " └─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#7", + " └─Selection 3.33 cop[tikv] isnull(test.t1.c2)", + " └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\",+inf], keep order:false, stats:pseudo" + ], + "Result": [ + "1" + ] + } + ] } ] From 98b318f793cb7b68719e8ce59120ff81b720d09f Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 22:41:44 +0800 Subject: [PATCH 11/21] rename to tidb_opt_prefix_index_single_scan --- executor/set_test.go | 22 +++++++++++----------- planner/core/find_best_task.go | 4 ++-- planner/core/integration_test.go | 2 +- planner/core/logical_plan_trace_test.go | 1 - sessionctx/variable/session.go | 4 ++-- sessionctx/variable/sysvar.go | 4 ++-- sessionctx/variable/tidb_vars.go | 9 ++++----- util/ranger/detacher.go | 10 +++++----- 8 files changed, 27 insertions(+), 29 deletions(-) diff --git a/executor/set_test.go b/executor/set_test.go index 4c617a8dc478c..21e54df040edf 100644 --- a/executor/set_test.go +++ b/executor/set_test.go @@ -830,19 +830,19 @@ func TestSetVar(t *testing.T) { tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 9999") tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1024")) // max value is 1024 - // test variable 'tidb_prefer_prefix_index_single_scan' + // test variable 'tidb_opt_prefix_index_single_scan' // global scope - tk.MustQuery("select @@global.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("1")) // default value - tk.MustExec("set global tidb_prefer_prefix_index_single_scan = 0") - tk.MustQuery("select @@global.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("0")) - tk.MustExec("set global tidb_prefer_prefix_index_single_scan = 1") - tk.MustQuery("select @@global.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("1")) + tk.MustQuery("select @@global.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1")) // default value + tk.MustExec("set global tidb_opt_prefix_index_single_scan = 0") + tk.MustQuery("select @@global.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("0")) + tk.MustExec("set global tidb_opt_prefix_index_single_scan = 1") + tk.MustQuery("select @@global.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1")) // session scope - tk.MustQuery("select @@session.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("1")) // default value - tk.MustExec("set session tidb_prefer_prefix_index_single_scan = 0") - tk.MustQuery("select @@session.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("0")) - tk.MustExec("set session tidb_prefer_prefix_index_single_scan = 1") - tk.MustQuery("select @@session.tidb_prefer_prefix_index_single_scan").Check(testkit.Rows("1")) + tk.MustQuery("select @@session.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1")) // default value + tk.MustExec("set session tidb_opt_prefix_index_single_scan = 0") + tk.MustQuery("select @@session.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("0")) + tk.MustExec("set session tidb_opt_prefix_index_single_scan = 1") + tk.MustQuery("select @@session.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1")) } func TestGetSetNoopVars(t *testing.T) { diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 36108fc1178be..a6bfdec552ad5 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1348,7 +1348,7 @@ func (ds *DataSource) indexCoveringCondition(condition expression.Expression, in } func (ds *DataSource) isSingleScan(indexColumns []*expression.Column, idxColLens []int) bool { - if !ds.ctx.GetSessionVars().PreferPrefixIndexSingleScan || ds.colsRequiringFullLen == nil { + if !ds.ctx.GetSessionVars().OptPrefixIndexSingleScan || ds.colsRequiringFullLen == nil { // ds.colsRequiringFullLen is set at (*DataSource).PruneColumns. In some cases we don't reach (*DataSource).PruneColumns // and ds.colsRequiringFullLen is nil, so we fall back to ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens). return ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens) @@ -1630,7 +1630,7 @@ func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Express var indexConditions, tableConditions []expression.Expression for _, cond := range conditions { var covered bool - if ds.ctx.GetSessionVars().PreferPrefixIndexSingleScan { + if ds.ctx.GetSessionVars().OptPrefixIndexSingleScan { covered = ds.indexCoveringCondition(cond, indexColumns, idxColLens) } else { covered = ds.indexCoveringColumns(expression.ExtractColumns(cond), indexColumns, idxColLens) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index ac3e8c6c2e230..403a46c87f848 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -7709,7 +7709,7 @@ func TestAvoidDoubleScanForPrefixIndex(t *testing.T) { KEY idx1 (c1), KEY idx2 (c1,c2(5)) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin`) - tk.MustExec("set tidb_prefer_prefix_index_single_scan = 1") + tk.MustExec("set tidb_opt_prefix_index_single_scan = 1") tk.MustExec("insert into t1 values ('a', '0xfff', '111111'), ('b', '0xfff', '222222'), ('c', '0xfff', null)") var input []string diff --git a/planner/core/logical_plan_trace_test.go b/planner/core/logical_plan_trace_test.go index f28b5814c4098..7233b49cb24e1 100644 --- a/planner/core/logical_plan_trace_test.go +++ b/planner/core/logical_plan_trace_test.go @@ -402,7 +402,6 @@ func TestSingleRuleTraceStep(t *testing.T) { err = Preprocess(context.Background(), s.ctx, stmt, WithPreprocessorReturn(&PreprocessorReturn{InfoSchema: s.is})) require.NoError(t, err, comment) sctx := MockContext() - sctx.GetSessionVars().PreferPrefixIndexSingleScan = false sctx.GetSessionVars().StmtCtx.EnableOptimizeTrace = true sctx.GetSessionVars().AllowAggPushDown = true builder, _ := NewPlanBuilder().Init(sctx, s.is, &hint.BlockHintProcessor{}) diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index da3909e36e507..892a512ac9530 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -1276,9 +1276,9 @@ type SessionVars struct { HookContext - // PreferPrefixIndexSingleScan indicates whether to do some optimizations to avoid double scan for prefix index. + // OptPrefixIndexSingleScan indicates whether to do some optimizations to avoid double scan for prefix index. // When set to true, `col is (not) null`(`col` is index prefix column) is regarded as index filter rather than table filter. - PreferPrefixIndexSingleScan bool + OptPrefixIndexSingleScan bool } // GetPreparedStmtByName returns the prepared statement specified by stmtName. diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 34ecbcf14fd1e..86617a1cf9d99 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -1958,8 +1958,8 @@ var defaultSysVars = []*SysVar{ return nil }, }, - {Scope: ScopeGlobal | ScopeSession, Name: TiDBPreferPrefixIndexSingleScan, Value: BoolToOnOff(DefTiDBPreferPrefixIndexSingleScan), Type: TypeBool, SetSession: func(s *SessionVars, val string) error { - s.PreferPrefixIndexSingleScan = TiDBOptOn(val) + {Scope: ScopeGlobal | ScopeSession, Name: TiDBOptPrefixIndexSingleScan, Value: BoolToOnOff(DefTiDBOptPrefixIndexSingleScan), Type: TypeBool, SetSession: func(s *SessionVars, val string) error { + s.OptPrefixIndexSingleScan = TiDBOptOn(val) return nil }}, } diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index b79e7011b37f3..0d6edb0a46b43 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -753,9 +753,9 @@ const ( // TiDBMergePartitionStatsConcurrency indicates the concurrecny when merge partition stats into global stats TiDBMergePartitionStatsConcurrency = "tidb_merge_partition_stats_concurrency" - // TiDBPreferPrefixIndexSingleScan indicates whether to do some optimizations to avoid double scan for prefix index. + // TiDBOptPrefixIndexSingleScan indicates whether to do some optimizations to avoid double scan for prefix index. // When set to true, `col is (not) null`(`col` is index prefix column) is regarded as index filter rather than table filter. - TiDBPreferPrefixIndexSingleScan = "tidb_prefer_prefix_index_single_scan" + TiDBOptPrefixIndexSingleScan = "tidb_opt_prefix_index_single_scan" ) // TiDB vars that have only global scope @@ -1069,10 +1069,9 @@ const ( DefTiDBMergePartitionStatsConcurrency = 1 DefTiDBServerMemoryLimitGCTrigger = 0.7 DefTiDBEnableGOGCTuner = true - DefTiDBPreferPrefixIndexSingleScan = true - // DefTiDBGOGCTunerThreshold is to limit TiDBGOGCTunerThreshold. - DefTiDBGOGCTunerThreshold float64 = 0.6 + DefTiDBGOGCTunerThreshold float64 = 0.6 + DefTiDBOptPrefixIndexSingleScan = true ) // Process global variables. diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index efe7da50370cd..30b0bd669a878 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -334,7 +334,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi checker := &conditionChecker{ checkerCol: d.cols[eqOrInCount], length: d.lengths[eqOrInCount], - preferPrefixIndexSingleScan: d.sctx.GetSessionVars().PreferPrefixIndexSingleScan, + preferPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, } if considerDNF { pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) @@ -660,7 +660,7 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression firstColumnChecker := &conditionChecker{ checkerCol: d.cols[0], length: d.lengths[0], - preferPrefixIndexSingleScan: d.sctx.GetSessionVars().PreferPrefixIndexSingleScan, + preferPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, } rb := builder{sc: d.sctx.GetSessionVars().StmtCtx} dnfItems := expression.FlattenDNFConditions(condition) @@ -925,7 +925,7 @@ func ExtractAccessConditionsForColumn(ctx sessionctx.Context, conds []expression checker := conditionChecker{ checkerCol: col, length: types.UnspecifiedLength, - preferPrefixIndexSingleScan: ctx.GetSessionVars().PreferPrefixIndexSingleScan, + preferPrefixIndexSingleScan: ctx.GetSessionVars().OptPrefixIndexSingleScan, } accessConds := make([]expression.Expression, 0, 8) filter := func(expr expression.Expression) bool { @@ -940,7 +940,7 @@ func DetachCondsForColumn(sctx sessionctx.Context, conds []expression.Expression checker := &conditionChecker{ checkerCol: col, length: types.UnspecifiedLength, - preferPrefixIndexSingleScan: sctx.GetSessionVars().PreferPrefixIndexSingleScan, + preferPrefixIndexSingleScan: sctx.GetSessionVars().OptPrefixIndexSingleScan, } return detachColumnCNFConditions(sctx, conds, checker) } @@ -963,7 +963,7 @@ func MergeDNFItems4Col(ctx sessionctx.Context, dnfItems []expression.Expression) checker := &conditionChecker{ checkerCol: cols[0], length: types.UnspecifiedLength, - preferPrefixIndexSingleScan: ctx.GetSessionVars().PreferPrefixIndexSingleScan, + preferPrefixIndexSingleScan: ctx.GetSessionVars().OptPrefixIndexSingleScan, } // If we can't use this condition to build range, we can't merge it. // Currently, we assume if every condition in a DNF expression can pass this check, then `Selectivity` must be able to From 1a8752b1bcf5f974f4ebe145ba443cc709f27e58 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 20 Oct 2022 23:35:31 +0800 Subject: [PATCH 12/21] upd --- planner/core/find_best_task.go | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index a6bfdec552ad5..aa152789b82d2 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1327,22 +1327,17 @@ func (ds *DataSource) indexCoveringCondition(condition expression.Expression, in case *expression.Column: return ds.indexCoveringColumn(v, indexColumns, idxColLens, false) case *expression.ScalarFunction: - switch v.FuncName.L { - case ast.LogicOr, ast.LogicAnd: - lhsCovered := ds.indexCoveringCondition(v.GetArgs()[0], indexColumns, idxColLens) - rhsCovered := ds.indexCoveringCondition(v.GetArgs()[1], indexColumns, idxColLens) - return lhsCovered && rhsCovered - case ast.UnaryNot: - return ds.indexCoveringCondition(v.GetArgs()[0], indexColumns, idxColLens) - case ast.IsNull: - col, ok := v.GetArgs()[0].(*expression.Column) - if !ok { + if v.FuncName.L == ast.IsNull { + if col, ok := v.GetArgs()[0].(*expression.Column); ok { + return ds.indexCoveringColumn(col, indexColumns, idxColLens, true) + } + } + for _, arg := range v.GetArgs() { + if !ds.indexCoveringCondition(arg, indexColumns, idxColLens) { return false } - return ds.indexCoveringColumn(col, indexColumns, idxColLens, true) - default: - return ds.indexCoveringColumns(expression.ExtractColumns(v), indexColumns, idxColLens) } + return true } return true } From eb07c7dbf72762b3b7497488f265fda35347dad2 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Fri, 21 Oct 2022 00:02:36 +0800 Subject: [PATCH 13/21] add test --- planner/core/integration_test.go | 2 +- .../core/testdata/integration_suite_in.json | 6 +- .../core/testdata/integration_suite_out.json | 58 ++++++++- util/ranger/ranger_test.go | 115 ++++++++++++++++++ 4 files changed, 177 insertions(+), 4 deletions(-) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 403a46c87f848..91e3c726ce328 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -7710,7 +7710,7 @@ func TestAvoidDoubleScanForPrefixIndex(t *testing.T) { KEY idx2 (c1,c2(5)) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin`) tk.MustExec("set tidb_opt_prefix_index_single_scan = 1") - tk.MustExec("insert into t1 values ('a', '0xfff', '111111'), ('b', '0xfff', '222222'), ('c', '0xfff', null)") + tk.MustExec("insert into t1 values ('a', '0xfff', '111111'), ('b', '0xfff', '222222'), ('c', '0xfff', ''), ('d', '0xfff', null)") var input []string var output []struct { diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 7d8511cb18fe0..6fe9a75650a37 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -1018,7 +1018,11 @@ "select count(1) from t1 where c1 = '0xfff' and c2 is not null", "select count(1) from t1 where c1 = '0xfff' and c2 is null", "select count(1) from t1 where c1 >= '0xfff' and c2 is not null", - "select count(1) from t1 where c1 >= '0xfff' and c2 is null" + "select count(1) from t1 where c1 >= '0xfff' and c2 is null", + "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is not null", + "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is null", + "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is not null", + "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is null" ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index f7b9cb60193d3..ea3370b9d0999 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -7575,7 +7575,7 @@ " └─IndexRangeScan 99.90 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" -inf,\"0xfff\" +inf], keep order:false, stats:pseudo" ], "Result": [ - "2" + "3" ] }, { @@ -7599,7 +7599,7 @@ " └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\",+inf], keep order:false, stats:pseudo" ], "Result": [ - "2" + "3" ] }, { @@ -7614,6 +7614,60 @@ "Result": [ "1" ] + }, + { + "SQL": "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is not null", + "Plan": [ + "Projection 99.90 root test.t1.c2", + "└─IndexLookUp 99.90 root ", + " ├─IndexRangeScan(Build) 99.90 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" -inf,\"0xfff\" +inf], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 99.90 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "111111", + "222222", + "" + ] + }, + { + "SQL": "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is null", + "Plan": [ + "Projection 0.10 root test.t1.c2", + "└─IndexLookUp 0.10 root ", + " ├─IndexRangeScan(Build) 0.10 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" NULL,\"0xfff\" NULL], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 0.10 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "" + ] + }, + { + "SQL": "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is not null", + "Plan": [ + "Projection 3330.00 root test.t1.c2", + "└─IndexLookUp 3330.00 root ", + " ├─Selection(Build) 3330.00 cop[tikv] not(isnull(test.t1.c2))", + " │ └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\",+inf], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 3330.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "111111", + "222222", + "" + ] + }, + { + "SQL": "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is null", + "Plan": [ + "Projection 3.33 root test.t1.c2", + "└─IndexLookUp 3.33 root ", + " ├─Selection(Build) 3.33 cop[tikv] isnull(test.t1.c2)", + " │ └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\",+inf], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 3.33 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "" + ] } ] } diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index ac767c7f54f38..14f9c6ce95a53 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -17,6 +17,7 @@ package ranger_test import ( "context" "fmt" + "github.com/pingcap/tidb/util/collate" "testing" "github.com/pingcap/tidb/config" @@ -2447,3 +2448,117 @@ func TestRangeFallbackForBuildColumnRange(t *testing.T) { require.Equal(t, "[]", fmt.Sprintf("%v", access)) require.Equal(t, "[in(test.t.b, 10, 20, 30)]", fmt.Sprintf("%v", remained)) } + +func TestPrefixIndexRange(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec(` +create table t( + a varchar(50), + b varchar(50), + c text(50), + d varbinary(50), + index idx_a(a(2)), + index idx_ab(a(2), b(2)), + index idx_c(c(2)), + index idx_d(d(2)) +)`) + + tests := []struct { + indexPos int + exprStr string + accessConds string + filterConds string + resultStr string + }{ + { + indexPos: 0, + exprStr: "a is null", + accessConds: "[isnull(test.t.a)]", + filterConds: "[]", + resultStr: "[[NULL,NULL]]", + }, + { + indexPos: 0, + exprStr: "a is not null", + accessConds: "[not(isnull(test.t.a))]", + filterConds: "[]", + resultStr: "[[-inf,+inf]]", + }, + { + indexPos: 1, + exprStr: "a = 'a' and b is null", + accessConds: "[eq(test.t.a, a) isnull(test.t.b)]", + filterConds: "[eq(test.t.a, a)]", + resultStr: "[[\"a\" NULL,\"a\" NULL]]", + }, + { + indexPos: 1, + exprStr: "a = 'a' and b is not null", + accessConds: "[eq(test.t.a, a) not(isnull(test.t.b))]", + filterConds: "[eq(test.t.a, a)]", + resultStr: "[[\"a\" -inf,\"a\" +inf]]", + }, + { + indexPos: 2, + exprStr: "c is null", + accessConds: "[isnull(test.t.c)]", + filterConds: "[]", + resultStr: "[[NULL,NULL]]", + }, + { + indexPos: 2, + exprStr: "c is not null", + accessConds: "[not(isnull(test.t.c))]", + filterConds: "[]", + resultStr: "[[-inf,+inf]]", + }, + { + indexPos: 3, + exprStr: "d is null", + accessConds: "[isnull(test.t.d)]", + filterConds: "[]", + resultStr: "[[NULL,NULL]]", + }, + { + indexPos: 3, + exprStr: "d is not null", + accessConds: "[not(isnull(test.t.d))]", + filterConds: "[]", + resultStr: "[[-inf,+inf]]", + }, + } + + collate.SetNewCollationEnabledForTest(true) + defer func() { collate.SetNewCollationEnabledForTest(false) }() + ctx := context.Background() + for _, tt := range tests { + sql := "select * from t where " + tt.exprStr + sctx := tk.Session() + stmts, err := session.Parse(sctx, sql) + require.NoError(t, err, fmt.Sprintf("error %v, for expr %s", err, tt.exprStr)) + require.Len(t, stmts, 1) + ret := &plannercore.PreprocessorReturn{} + err = plannercore.Preprocess(context.Background(), sctx, stmts[0], plannercore.WithPreprocessorReturn(ret)) + require.NoError(t, err, fmt.Sprintf("error %v, for resolve name, expr %s", err, tt.exprStr)) + p, _, err := plannercore.BuildLogicalPlanForTest(ctx, sctx, stmts[0], ret.InfoSchema) + require.NoError(t, err, fmt.Sprintf("error %v, for build plan, expr %s", err, tt.exprStr)) + selection := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection) + tbl := selection.Children()[0].(*plannercore.DataSource).TableInfo() + require.NotNil(t, selection, fmt.Sprintf("expr:%v", tt.exprStr)) + conds := make([]expression.Expression, len(selection.Conditions)) + for i, cond := range selection.Conditions { + conds[i] = expression.PushDownNot(sctx, cond) + } + cols, lengths := expression.IndexInfo2PrefixCols(tbl.Columns, selection.Schema().Columns, tbl.Indices[tt.indexPos]) + require.NotNil(t, cols) + res, err := ranger.DetachCondAndBuildRangeForIndex(sctx, conds, cols, lengths, 0) + require.NoError(t, err) + require.Equal(t, tt.accessConds, fmt.Sprintf("%s", res.AccessConds), fmt.Sprintf("wrong access conditions for expr: %s", tt.exprStr)) + require.Equal(t, tt.filterConds, fmt.Sprintf("%s", res.RemainedConds), fmt.Sprintf("wrong filter conditions for expr: %s", tt.exprStr)) + got := fmt.Sprintf("%v", res.Ranges) + require.Equal(t, tt.resultStr, got, fmt.Sprintf("different for expr %s", tt.exprStr)) + } +} From 6a36660ffff2a9e92f8444b5f85f2a913a440edc Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Fri, 21 Oct 2022 00:08:46 +0800 Subject: [PATCH 14/21] upd --- util/ranger/ranger_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index 14f9c6ce95a53..b414fe6cdcd34 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -2465,6 +2465,7 @@ create table t( index idx_c(c(2)), index idx_d(d(2)) )`) + tk.MustExec("set tidb_opt_prefix_index_single_scan = 1") tests := []struct { indexPos int From b61ee9da95acc08604fe4fa54843e857329b59f6 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Fri, 21 Oct 2022 10:17:20 +0800 Subject: [PATCH 15/21] fixup --- util/ranger/checker.go | 8 ++++---- util/ranger/detacher.go | 30 +++++++++++++++--------------- util/ranger/ranger_test.go | 2 +- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/util/ranger/checker.go b/util/ranger/checker.go index 9781ced9a992d..2e3cf4d26d725 100644 --- a/util/ranger/checker.go +++ b/util/ranger/checker.go @@ -24,9 +24,9 @@ import ( // conditionChecker checks if this condition can be pushed to index planner. type conditionChecker struct { - checkerCol *expression.Column - length int - preferPrefixIndexSingleScan bool + checkerCol *expression.Column + length int + optPrefixIndexSingleScan bool } func (c *conditionChecker) isFullLengthColumn() bool { @@ -94,7 +94,7 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction case ast.IsNull: if c.checkColumn(scalar.GetArgs()[0]) { var isNullReserve bool // We can know whether the column is null from prefix column of any length. - if !c.preferPrefixIndexSingleScan { + if !c.optPrefixIndexSingleScan { isNullReserve = !c.isFullLengthColumn() } return true, isNullReserve diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 30b0bd669a878..cbeed5b6364de 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -332,9 +332,9 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi return res, nil } checker := &conditionChecker{ - checkerCol: d.cols[eqOrInCount], - length: d.lengths[eqOrInCount], - preferPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, + checkerCol: d.cols[eqOrInCount], + length: d.lengths[eqOrInCount], + optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, } if considerDNF { pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) @@ -658,9 +658,9 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex // We will detach the conditions of every DNF items, then compose them to a DNF. func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) (Ranges, []expression.Expression, []*valueInfo, bool, error) { firstColumnChecker := &conditionChecker{ - checkerCol: d.cols[0], - length: d.lengths[0], - preferPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, + checkerCol: d.cols[0], + length: d.lengths[0], + optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, } rb := builder{sc: d.sctx.GetSessionVars().StmtCtx} dnfItems := expression.FlattenDNFConditions(condition) @@ -923,9 +923,9 @@ func AppendConditionsIfNotExist(conditions, condsToAppend []expression.Expressio // we don't need to return the remained filter conditions, it is much simpler than DetachCondsForColumn. func ExtractAccessConditionsForColumn(ctx sessionctx.Context, conds []expression.Expression, col *expression.Column) []expression.Expression { checker := conditionChecker{ - checkerCol: col, - length: types.UnspecifiedLength, - preferPrefixIndexSingleScan: ctx.GetSessionVars().OptPrefixIndexSingleScan, + checkerCol: col, + length: types.UnspecifiedLength, + optPrefixIndexSingleScan: ctx.GetSessionVars().OptPrefixIndexSingleScan, } accessConds := make([]expression.Expression, 0, 8) filter := func(expr expression.Expression) bool { @@ -938,9 +938,9 @@ func ExtractAccessConditionsForColumn(ctx sessionctx.Context, conds []expression // DetachCondsForColumn detaches access conditions for specified column from other filter conditions. func DetachCondsForColumn(sctx sessionctx.Context, conds []expression.Expression, col *expression.Column) (accessConditions, otherConditions []expression.Expression) { checker := &conditionChecker{ - checkerCol: col, - length: types.UnspecifiedLength, - preferPrefixIndexSingleScan: sctx.GetSessionVars().OptPrefixIndexSingleScan, + checkerCol: col, + length: types.UnspecifiedLength, + optPrefixIndexSingleScan: sctx.GetSessionVars().OptPrefixIndexSingleScan, } return detachColumnCNFConditions(sctx, conds, checker) } @@ -961,9 +961,9 @@ func MergeDNFItems4Col(ctx sessionctx.Context, dnfItems []expression.Expression) uniqueID := cols[0].UniqueID checker := &conditionChecker{ - checkerCol: cols[0], - length: types.UnspecifiedLength, - preferPrefixIndexSingleScan: ctx.GetSessionVars().OptPrefixIndexSingleScan, + checkerCol: cols[0], + length: types.UnspecifiedLength, + optPrefixIndexSingleScan: ctx.GetSessionVars().OptPrefixIndexSingleScan, } // If we can't use this condition to build range, we can't merge it. // Currently, we assume if every condition in a DNF expression can pass this check, then `Selectivity` must be able to diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index b414fe6cdcd34..230252bc8489d 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -17,7 +17,6 @@ package ranger_test import ( "context" "fmt" - "github.com/pingcap/tidb/util/collate" "testing" "github.com/pingcap/tidb/config" @@ -32,6 +31,7 @@ import ( "github.com/pingcap/tidb/testkit" "github.com/pingcap/tidb/testkit/testdata" "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tidb/util/ranger" "github.com/stretchr/testify/require" ) From d82bcaafb85d35bdaa8087261f835bf13a90e024 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Fri, 21 Oct 2022 15:09:38 +0800 Subject: [PATCH 16/21] add test --- planner/core/integration_test.go | 8 +++-- .../core/testdata/integration_suite_in.json | 6 ++-- .../core/testdata/integration_suite_out.json | 34 ++++++++++++++++--- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 91e3c726ce328..4eae4807aec17 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -7698,7 +7698,7 @@ func TestOuterJoinEliminationForIssue18216(t *testing.T) { tk.MustQuery("select group_concat(c order by (select group_concat(c order by c) from t2 where a=t1.a), c desc) from t1;").Check(testkit.Rows("2,1,4,3")) } -func TestAvoidDoubleScanForPrefixIndex(t *testing.T) { +func TestNullConditionForPrefixIndex(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") @@ -7709,8 +7709,10 @@ func TestAvoidDoubleScanForPrefixIndex(t *testing.T) { KEY idx1 (c1), KEY idx2 (c1,c2(5)) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin`) + tk.MustExec("create table t2(a int, b varchar(10), index idx(b(5)))") tk.MustExec("set tidb_opt_prefix_index_single_scan = 1") tk.MustExec("insert into t1 values ('a', '0xfff', '111111'), ('b', '0xfff', '222222'), ('c', '0xfff', ''), ('d', '0xfff', null)") + tk.MustExec("insert into t2 values (1, 'aaaaaa'), (2, 'bbb'), (3, ''), (4, null)") var input []string var output []struct { @@ -7724,9 +7726,9 @@ func TestAvoidDoubleScanForPrefixIndex(t *testing.T) { testdata.OnRecord(func() { output[i].SQL = tt output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery("explain format='brief' " + tt).Rows()) - output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(tt).Rows()) + output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(tt).Sort().Rows()) }) tk.MustQuery("explain format='brief' " + tt).Check(testkit.Rows(output[i].Plan...)) - tk.MustQuery(tt).Check(testkit.Rows(output[i].Result...)) + tk.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...)) } } diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 6fe9a75650a37..79f8074e791cb 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -1013,7 +1013,7 @@ ] }, { - "name": "TestAvoidDoubleScanForPrefixIndex", + "name": "TestNullConditionForPrefixIndex", "cases": [ "select count(1) from t1 where c1 = '0xfff' and c2 is not null", "select count(1) from t1 where c1 = '0xfff' and c2 is null", @@ -1022,7 +1022,9 @@ "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is not null", "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is null", "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is not null", - "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is null" + "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is null", + "select b from t2 where b is not null", + "select b from t2 where b is null" ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index ea3370b9d0999..144b7726c5ae0 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -7564,7 +7564,7 @@ ] }, { - "Name": "TestAvoidDoubleScanForPrefixIndex", + "Name": "TestNullConditionForPrefixIndex", "Cases": [ { "SQL": "select count(1) from t1 where c1 = '0xfff' and c2 is not null", @@ -7624,9 +7624,9 @@ " └─TableRowIDScan(Probe) 99.90 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ + "", "111111", - "222222", - "" + "222222" ] }, { @@ -7651,9 +7651,9 @@ " └─TableRowIDScan(Probe) 3330.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Result": [ + "", "111111", - "222222", - "" + "222222" ] }, { @@ -7668,6 +7668,30 @@ "Result": [ "" ] + }, + { + "SQL": "select b from t2 where b is not null", + "Plan": [ + "TableReader 9990.00 root data:Selection", + "└─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Result": [ + "", + "aaaaaa", + "bbb" + ] + }, + { + "SQL": "select b from t2 where b is null", + "Plan": [ + "IndexLookUp 10.00 root ", + "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t2, index:idx(b) range:[NULL,NULL], keep order:false, stats:pseudo", + "└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Result": [ + "" + ] } ] } From ad5b5c75ade11c02cdd2c353dc3bc90d826a64e9 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 24 Oct 2022 11:29:35 +0800 Subject: [PATCH 17/21] address comment --- planner/core/find_best_task.go | 10 ++++++---- util/ranger/checker.go | 29 +++++++++++++++-------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index aa152789b82d2..8105b896deee1 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1283,10 +1283,11 @@ func extractFiltersForIndexMerge(sc *stmtctx.StatementContext, client kv.Client, func indexColsCoveringCol(col *expression.Column, indexCols []*expression.Column, idxColLens []int, ignoreLen bool) bool { for i, indexCol := range indexCols { - if indexCol != nil && col.EqualByExprAndID(nil, indexCol) { - if ignoreLen || idxColLens[i] == types.UnspecifiedLength || idxColLens[i] == col.RetType.GetFlen() { - return true - } + if indexCol == nil || !col.EqualByExprAndID(nil, indexCol) { + continue + } + if ignoreLen || idxColLens[i] == types.UnspecifiedLength || idxColLens[i] == col.RetType.GetFlen() { + return true } } return false @@ -1327,6 +1328,7 @@ func (ds *DataSource) indexCoveringCondition(condition expression.Expression, in case *expression.Column: return ds.indexCoveringColumn(v, indexColumns, idxColLens, false) case *expression.ScalarFunction: + // Even if the index only contains prefix `col`, the index can cover `col is null`. if v.FuncName.L == ast.IsNull { if col, ok := v.GetArgs()[0].(*expression.Column); ok { return ds.indexCoveringColumn(col, indexColumns, idxColLens, true) diff --git a/util/ranger/checker.go b/util/ranger/checker.go index 2e3cf4d26d725..0468d3c92472e 100644 --- a/util/ranger/checker.go +++ b/util/ranger/checker.go @@ -44,10 +44,7 @@ func (c *conditionChecker) check(condition expression.Expression) (isAccessCond, if x.RetType.EvalType() == types.ETString { return false, true } - if c.checkColumn(x) { - return true, !c.isFullLengthColumn() - } - return false, true + return c.checkColumn(x) case *expression.Constant: return true, false } @@ -66,7 +63,7 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction return false, true case ast.EQ, ast.NE, ast.GE, ast.GT, ast.LE, ast.LT, ast.NullEQ: if _, ok := scalar.GetArgs()[0].(*expression.Constant); ok { - if c.checkColumn(scalar.GetArgs()[1]) { + if c.matchColumn(scalar.GetArgs()[1]) { // Checks whether the scalar function is calculated use the collation compatible with the column. if scalar.GetArgs()[1].GetType().EvalType() == types.ETString && !collate.CompatibleCollate(scalar.GetArgs()[1].GetType().GetCollate(), collation) { return false, true @@ -79,7 +76,7 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction } } if _, ok := scalar.GetArgs()[1].(*expression.Constant); ok { - if c.checkColumn(scalar.GetArgs()[0]) { + if c.matchColumn(scalar.GetArgs()[0]) { // Checks whether the scalar function is calculated use the collation compatible with the column. if scalar.GetArgs()[0].GetType().EvalType() == types.ETString && !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) { return false, true @@ -92,7 +89,7 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction } } case ast.IsNull: - if c.checkColumn(scalar.GetArgs()[0]) { + if c.matchColumn(scalar.GetArgs()[0]) { var isNullReserve bool // We can know whether the column is null from prefix column of any length. if !c.optPrefixIndexSingleScan { isNullReserve = !c.isFullLengthColumn() @@ -106,10 +103,7 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction return false, true } } - if c.checkColumn(scalar.GetArgs()[0]) { - return true, !c.isFullLengthColumn() - } - return false, true + return c.checkColumn(scalar.GetArgs()[0]) case ast.UnaryNot: // TODO: support "not like" convert to access conditions. s, ok := scalar.GetArgs()[0].(*expression.ScalarFunction) @@ -122,7 +116,7 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction } return c.check(scalar.GetArgs()[0]) case ast.In: - if !c.checkColumn(scalar.GetArgs()[0]) { + if !c.matchColumn(scalar.GetArgs()[0]) { return false, true } if scalar.GetArgs()[0].GetType().EvalType() == types.ETString && !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) { @@ -158,7 +152,7 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) { return false, true } - if !c.checkColumn(scalar.GetArgs()[0]) { + if !c.matchColumn(scalar.GetArgs()[0]) { return false, true } pattern, ok := scalar.GetArgs()[1].(*expression.Constant) @@ -212,10 +206,17 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA return true, likeFuncReserve } -func (c *conditionChecker) checkColumn(expr expression.Expression) bool { +func (c *conditionChecker) matchColumn(expr expression.Expression) bool { // Check if virtual expression column matched if c.checkerCol != nil { return c.checkerCol.EqualByExprAndID(nil, expr) } return false } + +func (c *conditionChecker) checkColumn(expr expression.Expression) (isAccessCond, shouldReserve bool) { + if c.matchColumn(expr) { + return true, !c.isFullLengthColumn() + } + return false, true +} From 25ce3a4c8a20349f635c129adc8176ce8c4089ab Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 24 Oct 2022 16:07:24 +0800 Subject: [PATCH 18/21] address comment --- planner/core/find_best_task.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 8105b896deee1..6f99593514459 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1118,7 +1118,7 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) (indexPlan PhysicalPlan) { is := ds.getOriginalPhysicalIndexScan(prop, path, false, false) - // TODO: Consider using indexCoveringColumns() to avoid another TableRead + // TODO: Consider using isIndexCoveringColumns() to avoid another TableRead indexConds := path.IndexFilters if indexConds != nil { var selectivity float64 @@ -1281,7 +1281,7 @@ func extractFiltersForIndexMerge(sc *stmtctx.StatementContext, client kv.Client, return } -func indexColsCoveringCol(col *expression.Column, indexCols []*expression.Column, idxColLens []int, ignoreLen bool) bool { +func isIndexColsCoveringCol(col *expression.Column, indexCols []*expression.Column, idxColLens []int, ignoreLen bool) bool { for i, indexCol := range indexCols { if indexCol == nil || !col.EqualByExprAndID(nil, indexCol) { continue @@ -1300,8 +1300,8 @@ func (ds *DataSource) indexCoveringColumn(column *expression.Column, indexColumn if column.ID == model.ExtraHandleID { return true } - coveredByPlainIndex := indexColsCoveringCol(column, indexColumns, idxColLens, ignoreLen) - coveredByClusteredIndex := indexColsCoveringCol(column, ds.commonHandleCols, ds.commonHandleLens, ignoreLen) + coveredByPlainIndex := isIndexColsCoveringCol(column, indexColumns, idxColLens, ignoreLen) + coveredByClusteredIndex := isIndexColsCoveringCol(column, ds.commonHandleCols, ds.commonHandleLens, ignoreLen) if !coveredByPlainIndex && !coveredByClusteredIndex { return false } @@ -1314,7 +1314,7 @@ func (ds *DataSource) indexCoveringColumn(column *expression.Column, indexColumn return true } -func (ds *DataSource) indexCoveringColumns(columns, indexColumns []*expression.Column, idxColLens []int) bool { +func (ds *DataSource) isIndexCoveringColumns(columns, indexColumns []*expression.Column, idxColLens []int) bool { for _, col := range columns { if !ds.indexCoveringColumn(col, indexColumns, idxColLens, false) { return false @@ -1323,7 +1323,7 @@ func (ds *DataSource) indexCoveringColumns(columns, indexColumns []*expression.C return true } -func (ds *DataSource) indexCoveringCondition(condition expression.Expression, indexColumns []*expression.Column, idxColLens []int) bool { +func (ds *DataSource) isIndexCoveringCondition(condition expression.Expression, indexColumns []*expression.Column, idxColLens []int) bool { switch v := condition.(type) { case *expression.Column: return ds.indexCoveringColumn(v, indexColumns, idxColLens, false) @@ -1335,7 +1335,7 @@ func (ds *DataSource) indexCoveringCondition(condition expression.Expression, in } } for _, arg := range v.GetArgs() { - if !ds.indexCoveringCondition(arg, indexColumns, idxColLens) { + if !ds.isIndexCoveringCondition(arg, indexColumns, idxColLens) { return false } } @@ -1347,14 +1347,14 @@ func (ds *DataSource) indexCoveringCondition(condition expression.Expression, in func (ds *DataSource) isSingleScan(indexColumns []*expression.Column, idxColLens []int) bool { if !ds.ctx.GetSessionVars().OptPrefixIndexSingleScan || ds.colsRequiringFullLen == nil { // ds.colsRequiringFullLen is set at (*DataSource).PruneColumns. In some cases we don't reach (*DataSource).PruneColumns - // and ds.colsRequiringFullLen is nil, so we fall back to ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens). - return ds.indexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens) + // and ds.colsRequiringFullLen is nil, so we fall back to ds.isIndexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens). + return ds.isIndexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens) } - if !ds.indexCoveringColumns(ds.colsRequiringFullLen, indexColumns, idxColLens) { + if !ds.isIndexCoveringColumns(ds.colsRequiringFullLen, indexColumns, idxColLens) { return false } for _, cond := range ds.allConds { - if !ds.indexCoveringCondition(cond, indexColumns, idxColLens) { + if !ds.isIndexCoveringCondition(cond, indexColumns, idxColLens) { return false } } @@ -1628,9 +1628,9 @@ func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Express for _, cond := range conditions { var covered bool if ds.ctx.GetSessionVars().OptPrefixIndexSingleScan { - covered = ds.indexCoveringCondition(cond, indexColumns, idxColLens) + covered = ds.isIndexCoveringCondition(cond, indexColumns, idxColLens) } else { - covered = ds.indexCoveringColumns(expression.ExtractColumns(cond), indexColumns, idxColLens) + covered = ds.isIndexCoveringColumns(expression.ExtractColumns(cond), indexColumns, idxColLens) } if covered { indexConditions = append(indexConditions, cond) From 626a39c351e47ee04db1a79e475983e5c72bc97c Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 24 Oct 2022 17:45:00 +0800 Subject: [PATCH 19/21] refine test --- planner/core/testdata/integration_suite_in.json | 4 ++-- planner/core/testdata/integration_suite_out.json | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 79f8074e791cb..0c6c517d97b9b 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -1023,8 +1023,8 @@ "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is null", "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is not null", "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is null", - "select b from t2 where b is not null", - "select b from t2 where b is null" + "select b from t2 use index(idx) where b is not null", + "select b from t2 use index(idx) where b is null" ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 144b7726c5ae0..9f9c1175a2147 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -7670,11 +7670,11 @@ ] }, { - "SQL": "select b from t2 where b is not null", + "SQL": "select b from t2 use index(idx) where b is not null", "Plan": [ - "TableReader 9990.00 root data:Selection", - "└─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))", - " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + "IndexLookUp 9990.00 root ", + "├─IndexFullScan(Build) 9990.00 cop[tikv] table:t2, index:idx(b) keep order:false, stats:pseudo", + "└─TableRowIDScan(Probe) 9990.00 cop[tikv] table:t2 keep order:false, stats:pseudo" ], "Result": [ "", @@ -7683,7 +7683,7 @@ ] }, { - "SQL": "select b from t2 where b is null", + "SQL": "select b from t2 use index(idx) where b is null", "Plan": [ "IndexLookUp 10.00 root ", "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t2, index:idx(b) range:[NULL,NULL], keep order:false, stats:pseudo", From 25bfcf70ce33566e8602a8a17d9f5f0ffebb4acf Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 24 Oct 2022 18:09:48 +0800 Subject: [PATCH 20/21] add (col + 1) is (not) null test --- .../core/testdata/integration_suite_in.json | 2 ++ .../core/testdata/integration_suite_out.json | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 0c6c517d97b9b..36234169bb2df 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -1019,6 +1019,8 @@ "select count(1) from t1 where c1 = '0xfff' and c2 is null", "select count(1) from t1 where c1 >= '0xfff' and c2 is not null", "select count(1) from t1 where c1 >= '0xfff' and c2 is null", + "select count(1) from t1 where c1 = '0xfff' and (c2 + 1) is not null", + "select count(1) from t1 where c1 = '0xfff' and (c2 + 1) is null", "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is not null", "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is null", "select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is not null", diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 9f9c1175a2147..fcb9cc9fcc749 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -7615,6 +7615,32 @@ "1" ] }, + { + "SQL": "select count(1) from t1 where c1 = '0xfff' and (c2 + 1) is not null", + "Plan": [ + "StreamAgg 1.00 root funcs:count(1)->Column#5", + "└─IndexLookUp 8.00 root ", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t1, index:idx1(c1) range:[\"0xfff\",\"0xfff\"], keep order:false, stats:pseudo", + " └─Selection(Probe) 8.00 cop[tikv] not(isnull(plus(cast(test.t1.c2, double BINARY), 1)))", + " └─TableRowIDScan 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "3" + ] + }, + { + "SQL": "select count(1) from t1 where c1 = '0xfff' and (c2 + 1) is null", + "Plan": [ + "StreamAgg 1.00 root funcs:count(1)->Column#5", + "└─IndexLookUp 8.00 root ", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t1, index:idx1(c1) range:[\"0xfff\",\"0xfff\"], keep order:false, stats:pseudo", + " └─Selection(Probe) 8.00 cop[tikv] isnull(plus(cast(test.t1.c2, double BINARY), 1))", + " └─TableRowIDScan 10.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Result": [ + "1" + ] + }, { "SQL": "select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is not null", "Plan": [ From 5ecc79e823f369111dcb315634f9b008cb05d075 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 24 Oct 2022 18:36:37 +0800 Subject: [PATCH 21/21] add plan cache test --- planner/core/integration_test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 4eae4807aec17..5cc953f50e617 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -7731,4 +7731,22 @@ func TestNullConditionForPrefixIndex(t *testing.T) { tk.MustQuery("explain format='brief' " + tt).Check(testkit.Rows(output[i].Plan...)) tk.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...)) } + + // test plan cache + tk.MustExec(`set tidb_enable_prepared_plan_cache=1`) + tk.MustExec("set @@tidb_enable_collect_execution_info=0") + tk.MustExec("prepare stmt from 'select count(1) from t1 where c1 = ? and c2 is not null'") + tk.MustExec("set @a = '0xfff'") + tk.MustQuery("execute stmt using @a").Check(testkit.Rows("3")) + tk.MustQuery("execute stmt using @a").Check(testkit.Rows("3")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("1")) + tk.MustQuery("execute stmt using @a").Check(testkit.Rows("3")) + tkProcess := tk.Session().ShowProcess() + ps := []*util.ProcessInfo{tkProcess} + tk.Session().SetSessionManager(&testkit.MockSessionManager{PS: ps}) + tk.MustQuery(fmt.Sprintf("explain for connection %d", tkProcess.ID)).Check(testkit.Rows( + "StreamAgg_18 1.00 root funcs:count(Column#7)->Column#5", + "└─IndexReader_19 1.00 root index:StreamAgg_9", + " └─StreamAgg_9 1.00 cop[tikv] funcs:count(1)->Column#7", + " └─IndexRangeScan_17 99.90 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" -inf,\"0xfff\" +inf], keep order:false, stats:pseudo")) }