Skip to content

Commit

Permalink
util/ranger: don't exclude start key for range from _ in like fun…
Browse files Browse the repository at this point in the history
…ction | tidb-test=pr/2257 (#48984) (#49064)

ref #48181, close #48983
  • Loading branch information
ti-chi-bot committed Dec 8, 2023
1 parent b9635d2 commit 66f9635
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 6 deletions.
29 changes: 29 additions & 0 deletions cmd/explaintest/r/planner_issue.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
id estRows task access object operator info
Projection 250.00 root test.t1.a, length(test.t1.a)->Column#3
└─IndexReader 250.00 root index:IndexRangeScan
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试 ","测试!"), keep order:false, stats:pseudo
explain format = brief select *,length(a) from t1 where a like '测试';
id estRows task access object operator info
Projection 10.00 root test.t1.a, length(test.t1.a)->Column#3
└─IndexReader 10.00 root index:IndexRangeScan
└─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo
select *,length(a) from t1 where a like '测试 %';
a length(a)
测试 6
测试 8
select *,length(a) from t1 where a like '测试';
a length(a)
测试 6
测试 8
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
id estRows task access object operator info
IndexReader 250.00 root index:Selection
└─Selection 250.00 cop[tikv] like(test.t1.a, "xxx_", 92)
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["xxx","xxy"), keep order:false, stats:pseudo
select * from t1 use index (ia) where a like 'xxx_';
a
xxx
10 changes: 10 additions & 0 deletions cmd/explaintest/t/planner_issue.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# https://github.com/pingcap/tidb/issues/48983
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
explain format = brief select *,length(a) from t1 where a like '测试';
select *,length(a) from t1 where a like '测试 %';
select *,length(a) from t1 where a like '测试';
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
select * from t1 use index (ia) where a like 'xxx_';
4 changes: 2 additions & 2 deletions planner/core/testdata/plan_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -1188,7 +1188,7 @@
},
{
"SQL": "select a from t where c_str like 'abc_'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc%af'",
Expand Down Expand Up @@ -1216,7 +1216,7 @@
},
{
"SQL": "select a from t where c_str like 'abc\\__'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 123",
Expand Down
21 changes: 18 additions & 3 deletions util/ranger/points.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/pingcap/tidb/errno"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/types"
Expand Down Expand Up @@ -677,9 +678,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
break
} else if pattern[i] == '_' {
// Get the prefix, but exclude the prefix.
// e.g., "abc_x", the start point exclude "abc",
// because the string length is more than 3.
exclude = true
// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
//
// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
// 'xxx' from 'xxx ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
// we will actually miss 'xxx ', which will cause wrong results.
if !isPadSpaceCollation(collation) {
exclude = true
}
isExactMatch = false
break
}
Expand Down Expand Up @@ -714,6 +721,14 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
return []*point{startPoint, endPoint}
}

// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
// implemented in the future.
func isPadSpaceCollation(collation string) bool {
return collation != charset.CollationBin
}

func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point {
switch n := expr.FuncName.L; n {
case ast.IsTruthWithoutNull:
Expand Down
2 changes: 1 addition & 1 deletion util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,7 @@ create table t(
exprStr: "a LIKE 'abc_'",
accessConds: "[like(test.t.a, abc_, 92)]",
filterConds: "[like(test.t.a, abc_, 92)]",
resultStr: "[(\"abc\",\"abd\")]",
resultStr: "[[\"abc\",\"abd\")]",
},
{
indexPos: 0,
Expand Down

0 comments on commit 66f9635

Please sign in to comment.