Skip to content

Commit

Permalink
util/ranger: don't exclude start key for range from _ in like fun…
Browse files Browse the repository at this point in the history
…ction (#48984)

ref #48181, close #48983
  • Loading branch information
time-and-fate authored Nov 30, 2023
1 parent d617bf0 commit 39df07d
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2195,7 +2195,7 @@
},
{
"SQL": "select a from t where c_str like 'abc_'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc%af'",
Expand Down Expand Up @@ -2223,7 +2223,7 @@
},
{
"SQL": "select a from t where c_str like 'abc\\__'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 123",
Expand Down
5 changes: 1 addition & 4 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package ranger
import (
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
Expand Down Expand Up @@ -175,9 +174,7 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
// unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by
// the `like` function. Therefore, a Selection is needed to filter the data.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here.
if collation != charset.CollationBin {
if isPadSpaceCollation(collation) {
likeFuncReserve = true
}

Expand Down
21 changes: 18 additions & 3 deletions pkg/util/ranger/points.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/pingcap/tidb/pkg/errno"
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
Expand Down Expand Up @@ -694,9 +695,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction, prefi
break
} else if pattern[i] == '_' {
// Get the prefix, but exclude the prefix.
// e.g., "abc_x", the start point exclude "abc",
// because the string length is more than 3.
exclude = true
// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
//
// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
// 'xxx' from 'xxx ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
// we will actually miss 'xxx ', which will cause wrong results.
if !isPadSpaceCollation(collation) {
exclude = true
}
isExactMatch = false
break
}
Expand Down Expand Up @@ -736,6 +743,14 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction, prefi
return []*point{startPoint, endPoint}
}

// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
// implemented in the future.
func isPadSpaceCollation(collation string) bool {
return collation != charset.CollationBin
}

func (r *builder) buildFromNot(expr *expression.ScalarFunction, prefixLen int) []*point {
switch n := expr.FuncName.L; n {
case ast.IsTruthWithoutNull:
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1114,7 +1114,7 @@ create table t(
exprStr: "a LIKE 'abc_'",
accessConds: "[like(test.t.a, abc_, 92)]",
filterConds: "[like(test.t.a, abc_, 92)]",
resultStr: "[(\"abc\",\"abd\")]",
resultStr: "[[\"abc\",\"abd\")]",
},
{
indexPos: 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ a
set @@tidb_max_chunk_size = default;
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 ');
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
id estRows task access object operator info
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
Expand All @@ -281,6 +281,16 @@ a length(a)
select *,length(a) from t1 where a like '测试';
a length(a)
测试 6
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
id estRows task access object operator info
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a
└─UnionScan 250.00 root like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
└─IndexReader 250.00 root index:Selection
└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["xxx","xxy"), keep order:false, stats:pseudo
select * from t1 use index (ia) where a like 'xxx_';
a
xxx
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
insert into t2 value('测试'),('测试 ');
explain format = brief select *,length(a) from t2 where a like '测试 %';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,16 @@ select a from (select 100 as a, 100 as b union all select * from t) t where b !=
set @@tidb_max_chunk_size = default;

# https://github.com/pingcap/tidb/issues/48821
# https://github.com/pingcap/tidb/issues/48983
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 ');
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
explain format = brief select *,length(a) from t1 where a like '测试';
select *,length(a) from t1 where a like '测试 %';
select *,length(a) from t1 where a like '测试';
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
select * from t1 use index (ia) where a like 'xxx_';
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
insert into t2 value('测试'),('测试 ');
explain format = brief select *,length(a) from t2 where a like '测试 %';
Expand Down

0 comments on commit 39df07d

Please sign in to comment.