pingcap · ti-chi-bot · Nov 30, 2023 · Nov 28, 2023 · Nov 28, 2023 · Nov 28, 2023
diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json
@@ -2195,7 +2195,7 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc_'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc%af'",
@@ -2223,7 +2223,7 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc\\__'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 123",

diff --git a/pkg/util/ranger/checker.go b/pkg/util/ranger/checker.go
@@ -17,7 +17,6 @@ package ranger
 import (
 	"github.com/pingcap/tidb/pkg/expression"
 	"github.com/pingcap/tidb/pkg/parser/ast"
-	"github.com/pingcap/tidb/pkg/parser/charset"
 	"github.com/pingcap/tidb/pkg/parser/mysql"
 	"github.com/pingcap/tidb/pkg/sessionctx"
 	"github.com/pingcap/tidb/pkg/types"
@@ -175,9 +174,7 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
 	// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
 	// unable to distinguish 'xxx' from 'xxx   ' by a single index range scan, and we may read more data than needed by
 	// the `like` function. Therefore, a Selection is needed to filter the data.
-	// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
-	// collation != binary check here.
-	if collation != charset.CollationBin {
+	if isPadSpaceCollation(collation) {
 		likeFuncReserve = true
 	}
 

diff --git a/pkg/util/ranger/points.go b/pkg/util/ranger/points.go
@@ -24,6 +24,7 @@ import (
 	"github.com/pingcap/tidb/pkg/errno"
 	"github.com/pingcap/tidb/pkg/expression"
 	"github.com/pingcap/tidb/pkg/parser/ast"
+	"github.com/pingcap/tidb/pkg/parser/charset"
 	"github.com/pingcap/tidb/pkg/parser/mysql"
 	"github.com/pingcap/tidb/pkg/sessionctx"
 	"github.com/pingcap/tidb/pkg/types"
@@ -694,9 +695,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction, prefi
 			break
 		} else if pattern[i] == '_' {
 			// Get the prefix, but exclude the prefix.
-			// e.g., "abc_x", the start point exclude "abc",
-			// because the string length is more than 3.
-			exclude = true
+			// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
+			//
+			// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
+			// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
+			// 'xxx' from 'xxx   ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
+			// we will actually miss 'xxx   ', which will cause wrong results.
+			if !isPadSpaceCollation(collation) {
+				exclude = true
+			}
 			isExactMatch = false
 			break
 		}
@@ -736,6 +743,14 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction, prefi
 	return []*point{startPoint, endPoint}
 }
 
+// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
+// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
+// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
+// implemented in the future.
+func isPadSpaceCollation(collation string) bool {
+	return collation != charset.CollationBin
+}
+
 func (r *builder) buildFromNot(expr *expression.ScalarFunction, prefixLen int) []*point {
 	switch n := expr.FuncName.L; n {
 	case ast.IsTruthWithoutNull:

diff --git a/pkg/util/ranger/ranger_test.go b/pkg/util/ranger/ranger_test.go
@@ -1114,7 +1114,7 @@ create table t(
 			exprStr:     "a LIKE 'abc_'",
 			accessConds: "[like(test.t.a, abc_, 92)]",
 			filterConds: "[like(test.t.a, abc_, 92)]",
-			resultStr:   "[(\"abc\",\"abd\")]",
+			resultStr:   "[[\"abc\",\"abd\")]",
 		},
 		{
 			indexPos:    0,

diff --git a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result
@@ -260,7 +260,7 @@ a
 set @@tidb_max_chunk_size = default;
 drop table if exists t1, t2;
 create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
-insert into t1 value('测试'),('测试  ');
+insert into t1 value('测试'),('测试  '),('xxx ');
 explain format = brief select *,length(a) from t1 where a like '测试 %';
 id	estRows	task	access object	operator info
 Projection	250.00	root		planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
@@ -281,6 +281,16 @@ a	length(a)
 select *,length(a) from t1 where a like '测试';
 a	length(a)
 测试	6
+explain format = brief select * from t1 use index (ia) where a like 'xxx_';
+id	estRows	task	access object	operator info
+Projection	250.00	root		planner__core__issuetest__planner_issue.t1.a
+└─UnionScan	250.00	root		like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
+  └─IndexReader	250.00	root		index:Selection
+    └─Selection	250.00	cop[tikv]		like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
+      └─IndexRangeScan	250.00	cop[tikv]	table:t1, index:ia(a)	range:["xxx","xxy"), keep order:false, stats:pseudo
+select * from t1 use index (ia) where a like 'xxx_';
+a
+xxx 
 create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
 insert into t2 value('测试'),('测试  ');
 explain format = brief select *,length(a) from t2 where a like '测试 %';

diff --git a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test
@@ -153,13 +153,16 @@ select a from (select 100 as a, 100 as b union all select * from t) t where b !=
 set @@tidb_max_chunk_size = default;
 
 # https://github.com/pingcap/tidb/issues/48821
+# https://github.com/pingcap/tidb/issues/48983
 drop table if exists t1, t2;
 create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
-insert into t1 value('测试'),('测试  ');
+insert into t1 value('测试'),('测试  '),('xxx ');
 explain format = brief select *,length(a) from t1 where a like '测试 %';
 explain format = brief select *,length(a) from t1 where a like '测试';
 select *,length(a) from t1 where a like '测试 %';
 select *,length(a) from t1 where a like '测试';
+explain format = brief select * from t1 use index (ia) where a like 'xxx_';
+select * from t1 use index (ia) where a like 'xxx_';
 create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
 insert into t2 value('测试'),('测试  ');
 explain format = brief select *,length(a) from t2 where a like '测试 %';