Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util/ranger: don't exclude start key for range from _ in like function | tidb-test=pr/2254 #48984

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2195,7 +2195,7 @@
},
{
"SQL": "select a from t where c_str like 'abc_'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc%af'",
Expand Down Expand Up @@ -2223,7 +2223,7 @@
},
{
"SQL": "select a from t where c_str like 'abc\\__'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 123",
Expand Down
5 changes: 1 addition & 4 deletions pkg/util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package ranger
import (
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
Expand Down Expand Up @@ -175,9 +174,7 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
// unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by
// the `like` function. Therefore, a Selection is needed to filter the data.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here.
if collation != charset.CollationBin {
if isPadSpaceCollation(collation) {
likeFuncReserve = true
}

Expand Down
21 changes: 18 additions & 3 deletions pkg/util/ranger/points.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/pingcap/tidb/pkg/errno"
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
Expand Down Expand Up @@ -694,9 +695,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction, prefi
break
} else if pattern[i] == '_' {
// Get the prefix, but exclude the prefix.
// e.g., "abc_x", the start point exclude "abc",
// because the string length is more than 3.
exclude = true
// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
//
// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
// 'xxx' from 'xxx ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
// we will actually miss 'xxx ', which will cause wrong results.
if !isPadSpaceCollation(collation) {
exclude = true
}
isExactMatch = false
break
}
Expand Down Expand Up @@ -736,6 +743,14 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction, prefi
return []*point{startPoint, endPoint}
}

// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
// implemented in the future.
func isPadSpaceCollation(collation string) bool {
return collation != charset.CollationBin
}

func (r *builder) buildFromNot(expr *expression.ScalarFunction, prefixLen int) []*point {
switch n := expr.FuncName.L; n {
case ast.IsTruthWithoutNull:
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1114,7 +1114,7 @@ create table t(
exprStr: "a LIKE 'abc_'",
accessConds: "[like(test.t.a, abc_, 92)]",
filterConds: "[like(test.t.a, abc_, 92)]",
resultStr: "[(\"abc\",\"abd\")]",
resultStr: "[[\"abc\",\"abd\")]",
},
{
indexPos: 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ a
set @@tidb_max_chunk_size = default;
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 ');
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
id estRows task access object operator info
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
Expand All @@ -281,6 +281,16 @@ a length(a)
select *,length(a) from t1 where a like '测试';
a length(a)
测试 6
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
id estRows task access object operator info
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a
└─UnionScan 250.00 root like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
└─IndexReader 250.00 root index:Selection
└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["xxx","xxy"), keep order:false, stats:pseudo
select * from t1 use index (ia) where a like 'xxx_';
a
xxx
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
insert into t2 value('测试'),('测试 ');
explain format = brief select *,length(a) from t2 where a like '测试 %';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,16 @@ select a from (select 100 as a, 100 as b union all select * from t) t where b !=
set @@tidb_max_chunk_size = default;

# https://github.com/pingcap/tidb/issues/48821
# https://github.com/pingcap/tidb/issues/48983
drop table if exists t1, t2;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 ');
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
explain format = brief select *,length(a) from t1 where a like '测试';
select *,length(a) from t1 where a like '测试 %';
select *,length(a) from t1 where a like '测试';
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
select * from t1 use index (ia) where a like 'xxx_';
create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
insert into t2 value('测试'),('测试 ');
explain format = brief select *,length(a) from t2 where a like '测试 %';
Expand Down