Skip to content

Commit

Permalink
planner: consider prefix index column length in skyline pruning (#27527)
Browse files Browse the repository at this point in the history
  • Loading branch information
xuyifangreeneyes authored Nov 26, 2021
1 parent a9f161d commit 126dbc8
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 43 deletions.
51 changes: 15 additions & 36 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ import (
"github.com/pingcap/tidb/util/ranger"
"github.com/pingcap/tidb/util/set"
"go.uber.org/zap"
"golang.org/x/tools/container/intsets"
)

const (
Expand Down Expand Up @@ -417,30 +416,10 @@ func (ds *DataSource) tryToGetDualTask() (task, error) {

// candidatePath is used to maintain required info for skyline pruning.
type candidatePath struct {
path *util.AccessPath
accessCondsColSet *intsets.Sparse // accessCondsColSet is the set of columns that occurred in the access conditions.
indexFiltersColSet *intsets.Sparse // indexFiltersColSet is the set of columns that occurred in the index filters.
isMatchProp bool
}

// compareColumnSet will compares the two set. The last return value is used to indicate
// if they are comparable, it is false when both two sets have columns that do not occur in the other.
// When the second return value is true, the value of first:
// (1) -1 means that `l` is a strict subset of `r`;
// (2) 0 means that `l` equals to `r`;
// (3) 1 means that `l` is a strict superset of `r`.
func compareColumnSet(l, r *intsets.Sparse) (int, bool) {
lLen, rLen := l.Len(), r.Len()
if lLen < rLen {
// -1 is meaningful only when l.SubsetOf(r) is true.
return -1, l.SubsetOf(r)
}
if lLen == rLen {
// 0 is meaningful only when l.SubsetOf(r) is true.
return 0, l.SubsetOf(r)
}
// 1 is meaningful only when r.SubsetOf(l) is true.
return 1, r.SubsetOf(l)
path *util.AccessPath
accessCondsColMap util.Col2Len // accessCondsColMap maps Column.UniqueID to column length for the columns in AccessConds.
indexCondsColMap util.Col2Len // indexCondsColMap maps Column.UniqueID to column length for the columns in AccessConds and indexFilters.
isMatchProp bool
}

func compareBool(l, r bool) int {
Expand All @@ -456,21 +435,21 @@ func compareBool(l, r bool) int {
func compareIndexBack(lhs, rhs *candidatePath) (int, bool) {
result := compareBool(lhs.path.IsSingleScan, rhs.path.IsSingleScan)
if result == 0 && !lhs.path.IsSingleScan {
// if both lhs and rhs need to access table after IndexScan, we use the set of columns that occurred in IndexFilters
// if both lhs and rhs need to access table after IndexScan, we utilize the set of columns that occurred in AccessConds and IndexFilters
// to compare how many table rows will be accessed.
return compareColumnSet(lhs.indexFiltersColSet, rhs.indexFiltersColSet)
return util.CompareCol2Len(lhs.indexCondsColMap, rhs.indexCondsColMap)
}
return result, true
}

// compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions:
// (1): the set of columns that occurred in the access condition,
// (2): whether or not it matches the physical property
// (3): does it require a double scan.
// (2): does it require a double scan,
// (3): whether or not it matches the physical property.
// If `x` is not worse than `y` at all factors,
// and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
func compareCandidates(lhs, rhs *candidatePath) int {
setsResult, comparable := compareColumnSet(lhs.accessCondsColSet, rhs.accessCondsColSet)
accessResult, comparable := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
if !comparable {
return 0
}
Expand All @@ -479,11 +458,11 @@ func compareCandidates(lhs, rhs *candidatePath) int {
return 0
}
matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp)
sum := setsResult + scanResult + matchResult
if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 {
sum := accessResult + scanResult + matchResult
if accessResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 {
return 1
}
if setsResult <= 0 && scanResult <= 0 && matchResult <= 0 && sum < 0 {
if accessResult <= 0 && scanResult <= 0 && matchResult <= 0 && sum < 0 {
return -1
}
return 0
Expand Down Expand Up @@ -543,15 +522,15 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
candidate := &candidatePath{path: path}
candidate.isMatchProp = ds.isMatchProp(path, prop)
candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
candidate.accessCondsColMap = util.ExtractCol2Len(path.AccessConds, nil, nil)
return candidate
}

func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
candidate := &candidatePath{path: path}
candidate.isMatchProp = ds.isMatchProp(path, prop)
candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters)
candidate.accessCondsColMap = util.ExtractCol2Len(path.AccessConds, path.IdxCols, path.IdxColLens)
candidate.indexCondsColMap = util.ExtractCol2Len(append(path.AccessConds, path.IndexFilters...), path.FullIdxCols, path.FullIdxColLens)
return candidate
}

Expand Down
9 changes: 9 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4859,7 +4859,16 @@ func (s *testIntegrationSuite) TestIssues29711(c *C) {
" └─TopN(Probe) 10.00 cop[tikv] test.t29711.a, offset:0, count:10",
" └─TableRowIDScan 10000.00 cop[tikv] table:t29711 keep order:false, stats:pseudo",
))
}

func (s *testIntegrationSuite) TestIssue27313(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a varchar(100), b int, c int, index idx1(a(2), b), index idx2(a))")
tk.MustExec("explain format = 'verbose' select * from t where a = 'abcdefghijk' and b > 4")
// no warning indicates that idx2 is not pruned by idx1.
tk.MustQuery("show warnings").Check(testkit.Rows())
}

func (s *testIntegrationSuite) TestIssue30094(c *C) {
Expand Down
13 changes: 6 additions & 7 deletions planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/ranger"
"go.uber.org/zap"
"golang.org/x/tools/container/intsets"
)

func (p *basePhysicalPlan) StatsCount() float64 {
Expand Down Expand Up @@ -297,9 +296,9 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error {
}
}
if selected == nil && len(uniqueIdxsWithDoubleScan) > 0 {
uniqueIdxColumnSets := make([]*intsets.Sparse, 0, len(uniqueIdxsWithDoubleScan))
uniqueIdxAccessCols := make([]util.Col2Len, 0, len(uniqueIdxsWithDoubleScan))
for _, uniqueIdx := range uniqueIdxsWithDoubleScan {
uniqueIdxColumnSets = append(uniqueIdxColumnSets, expression.ExtractColumnSet(uniqueIdx.AccessConds))
uniqueIdxAccessCols = append(uniqueIdxAccessCols, uniqueIdx.GetCol2LenFromAccessConds())
// Find the unique index with the minimal number of ranges as `uniqueBest`.
if uniqueBest == nil || len(uniqueIdx.Ranges) < len(uniqueBest.Ranges) {
uniqueBest = uniqueIdx
Expand All @@ -314,10 +313,10 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error {
// Hence, for each index in `singleScanIdxs`, we check whether it is better than some index in `uniqueIdxsWithDoubleScan`.
// If yes, the index is a refined one. We find the refined index with the minimal number of ranges as `refineBest`.
for _, singleScanIdx := range singleScanIdxs {
columnSet := expression.ExtractColumnSet(singleScanIdx.AccessConds)
for _, uniqueIdxColumnSet := range uniqueIdxColumnSets {
setsResult, comparable := compareColumnSet(columnSet, uniqueIdxColumnSet)
if comparable && setsResult == 1 {
col2Len := singleScanIdx.GetCol2LenFromAccessConds()
for _, uniqueIdxCol2Len := range uniqueIdxAccessCols {
accessResult, comparable := util.CompareCol2Len(col2Len, uniqueIdxCol2Len)
if comparable && accessResult == 1 {
if refinedBest == nil || len(singleScanIdx.Ranges) < len(refinedBest.Ranges) {
refinedBest = singleScanIdx
}
Expand Down
27 changes: 27 additions & 0 deletions planner/util/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2021 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package util

import (
"testing"

"github.com/pingcap/tidb/util/testbridge"
"go.uber.org/goleak"
)

func TestMain(m *testing.M) {
testbridge.WorkaroundGoCheckFlags()
goleak.VerifyTestMain(m)
}
104 changes: 104 additions & 0 deletions planner/util/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,107 @@ func (path *AccessPath) OnlyPointRange(sctx sessionctx.Context) bool {
}
return noIntervalRange && !haveNullVal
}

// Col2Len maps expression.Column.UniqueID to column length
type Col2Len map[int64]int

// ExtractCol2Len collects index/table columns with lengths from expressions. If idxCols and idxColLens are not nil, it collects index columns with lengths(maybe prefix lengths).
// Otherwise it collects table columns with full lengths.
func ExtractCol2Len(exprs []expression.Expression, idxCols []*expression.Column, idxColLens []int) Col2Len {
col2len := make(Col2Len, len(idxCols))
for _, expr := range exprs {
extractCol2LenFromExpr(expr, idxCols, idxColLens, col2len)
}
return col2len
}

func extractCol2LenFromExpr(expr expression.Expression, idxCols []*expression.Column, idxColLens []int, col2Len Col2Len) {
switch v := expr.(type) {
case *expression.Column:
if idxCols == nil {
col2Len[v.UniqueID] = types.UnspecifiedLength
} else {
for i, col := range idxCols {
if col != nil && v.EqualByExprAndID(nil, col) {
col2Len[v.UniqueID] = idxColLens[i]
break
}
}
}
case *expression.ScalarFunction:
for _, arg := range v.GetArgs() {
extractCol2LenFromExpr(arg, idxCols, idxColLens, col2Len)
}
}
}

// compareLength will compare the two column lengths. The return value:
// (1) -1 means that l is shorter than r;
// (2) 0 means that l equals to r;
// (3) 1 means that l is longer than r;
func compareLength(l, r int) int {
if l == r {
return 0
}
if l == types.UnspecifiedLength {
return 1
}
if r == types.UnspecifiedLength {
return -1
}
if l > r {
return 1
}
return -1
}

// dominate return true if each column of c2 exists in c1 and c2's column length is no longer than c1's column length.
func (c1 Col2Len) dominate(c2 Col2Len) bool {
if len(c2) > len(c1) {
return false
}
for colID, len2 := range c2 {
len1, ok := c1[colID]
if !ok || compareLength(len2, len1) == 1 {
return false
}
}
return true
}

// CompareCol2Len will compare the two Col2Len maps. The last return value is used to indicate whether they are comparable.
// When the second return value is true, the first return value:
// (1) -1 means that c1 is worse than c2;
// (2) 0 means that c1 equals to c2;
// (3) 1 means that c1 is better than c2;
func CompareCol2Len(c1, c2 Col2Len) (int, bool) {
l1, l2 := len(c1), len(c2)
if l1 > l2 {
if c1.dominate(c2) {
return 1, true
}
return 0, false
}
if l1 < l2 {
if c2.dominate(c1) {
return -1, true
}
return 0, false
}
// If c1 and c2 have the same columns but have different lengths on some column, we regard c1 and c2 incomparable.
for colID, colLen2 := range c2 {
colLen1, ok := c1[colID]
if !ok || colLen1 != colLen2 {
return 0, false
}
}
return 0, true
}

// GetCol2LenFromAccessConds returns columns with lengths from path.AccessConds.
func (path *AccessPath) GetCol2LenFromAccessConds() Col2Len {
if path.IsTablePath() {
return ExtractCol2Len(path.AccessConds, nil, nil)
}
return ExtractCol2Len(path.AccessConds, path.IdxCols, path.IdxColLens)
}
72 changes: 72 additions & 0 deletions planner/util/path_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2021 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package util

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestCompareCol2Len(t *testing.T) {
tests := []struct {
c1 Col2Len
c2 Col2Len
res int
comparable bool
}{
{
c1: Col2Len{1: -1, 2: -1, 3: -1},
c2: Col2Len{1: -1, 2: 10},
res: 1,
comparable: true,
},
{
c1: Col2Len{1: 5},
c2: Col2Len{1: 10, 2: -1},
res: -1,
comparable: true,
},
{
c1: Col2Len{1: -1, 2: -1},
c2: Col2Len{1: -1, 2: 5, 3: -1},
res: 0,
comparable: false,
},
{
c1: Col2Len{1: -1, 2: 10},
c2: Col2Len{1: -1, 2: 5, 3: -1},
res: 0,
comparable: false,
},
{
c1: Col2Len{1: -1, 2: 10},
c2: Col2Len{1: -1, 2: 10},
res: 0,
comparable: true,
},
{
c1: Col2Len{1: -1, 2: -1},
c2: Col2Len{1: -1, 2: 10},
res: 0,
comparable: false,
},
}
for _, tt := range tests {
res, comparable := CompareCol2Len(tt.c1, tt.c2)
require.Equal(t, tt.res, res)
require.Equal(t, tt.comparable, comparable)
}
}

0 comments on commit 126dbc8

Please sign in to comment.