From e8f85e18153679ca6a1bca8ec3fb4e2175319ca6 Mon Sep 17 00:00:00 2001
From: time-and-fate <25057648+time-and-fate@users.noreply.github.com>
Date: Tue, 28 Nov 2023 22:38:53 +0800
Subject: [PATCH 1/3] add

---
 pkg/util/ranger/checker.go                    |  5 +----
 pkg/util/ranger/points.go                     | 21 ++++++++++++++++---
 .../core/issuetest/planner_issue.result       | 12 ++++++++++-
 .../planner/core/issuetest/planner_issue.test |  5 ++++-
 4 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/pkg/util/ranger/checker.go b/pkg/util/ranger/checker.go
index 2c7812b6cdb3f..d343d27aea397 100644
--- a/pkg/util/ranger/checker.go
+++ b/pkg/util/ranger/checker.go
@@ -17,7 +17,6 @@ package ranger
 import (
 	"github.com/pingcap/tidb/pkg/expression"
 	"github.com/pingcap/tidb/pkg/parser/ast"
-	"github.com/pingcap/tidb/pkg/parser/charset"
 	"github.com/pingcap/tidb/pkg/parser/mysql"
 	"github.com/pingcap/tidb/pkg/sessionctx"
 	"github.com/pingcap/tidb/pkg/types"
@@ -175,9 +174,7 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA
 	// In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are
 	// unable to distinguish 'xxx' from 'xxx   ' by a single index range scan, and we may read more data than needed by
 	// the `like` function. Therefore, a Selection is needed to filter the data.
-	// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
-	// collation != binary check here.
-	if collation != charset.CollationBin {
+	if isPadSpaceCollation(collation) {
 		likeFuncReserve = true
 	}
 
diff --git a/pkg/util/ranger/points.go b/pkg/util/ranger/points.go
index a6b4b796fe561..439afedd68311 100644
--- a/pkg/util/ranger/points.go
+++ b/pkg/util/ranger/points.go
@@ -24,6 +24,7 @@ import (
 	"github.com/pingcap/tidb/pkg/errno"
 	"github.com/pingcap/tidb/pkg/expression"
 	"github.com/pingcap/tidb/pkg/parser/ast"
+	"github.com/pingcap/tidb/pkg/parser/charset"
 	"github.com/pingcap/tidb/pkg/parser/mysql"
 	"github.com/pingcap/tidb/pkg/sessionctx"
 	"github.com/pingcap/tidb/pkg/types"
@@ -694,9 +695,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction, prefi
 			break
 		} else if pattern[i] == '_' {
 			// Get the prefix, but exclude the prefix.
-			// e.g., "abc_x", the start point exclude "abc",
-			// because the string length is more than 3.
-			exclude = true
+			// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
+			//
+			// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
+			// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
+			// 'xxx' from 'xxx   ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
+			// we will actually miss 'xxx   ', which will cause wrong results.
+			if !isPadSpaceCollation(collation) {
+				exclude = true
+			}
 			isExactMatch = false
 			break
 		}
@@ -736,6 +743,14 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction, prefi
 	return []*point{startPoint, endPoint}
 }
 
+// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
+// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
+// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
+// implemented in the future.
+func isPadSpaceCollation(collation string) bool {
+	return collation != charset.CollationBin
+}
+
 func (r *builder) buildFromNot(expr *expression.ScalarFunction, prefixLen int) []*point {
 	switch n := expr.FuncName.L; n {
 	case ast.IsTruthWithoutNull:
diff --git a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result
index 920b962398a76..5b5aae65d4adb 100644
--- a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result
+++ b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result
@@ -260,7 +260,7 @@ a
 set @@tidb_max_chunk_size = default;
 drop table if exists t1, t2;
 create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
-insert into t1 value('测试'),('测试  ');
+insert into t1 value('测试'),('测试  '),('xxx ');
 explain format = brief select *,length(a) from t1 where a like '测试 %';
 id	estRows	task	access object	operator info
 Projection	250.00	root		planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
@@ -281,6 +281,16 @@ a	length(a)
 select *,length(a) from t1 where a like '测试';
 a	length(a)
 测试	6
+explain format = brief select * from t1 use index (ia) where a like 'xxx_';
+id	estRows	task	access object	operator info
+Projection	250.00	root		planner__core__issuetest__planner_issue.t1.a
+└─UnionScan	250.00	root		like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
+  └─IndexReader	250.00	root		index:Selection
+    └─Selection	250.00	cop[tikv]		like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
+      └─IndexRangeScan	250.00	cop[tikv]	table:t1, index:ia(a)	range:["xxx","xxy"), keep order:false, stats:pseudo
+select * from t1 use index (ia) where a like 'xxx_';
+a
+xxx 
 create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
 insert into t2 value('测试'),('测试  ');
 explain format = brief select *,length(a) from t2 where a like '测试 %';
diff --git a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test
index 45d79ba30b5fa..bb04737801aeb 100644
--- a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test
+++ b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test
@@ -153,13 +153,16 @@ select a from (select 100 as a, 100 as b union all select * from t) t where b !=
 set @@tidb_max_chunk_size = default;
 
 # https://github.com/pingcap/tidb/issues/48821
+# https://github.com/pingcap/tidb/issues/48983
 drop table if exists t1, t2;
 create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
-insert into t1 value('测试'),('测试  ');
+insert into t1 value('测试'),('测试  '),('xxx ');
 explain format = brief select *,length(a) from t1 where a like '测试 %';
 explain format = brief select *,length(a) from t1 where a like '测试';
 select *,length(a) from t1 where a like '测试 %';
 select *,length(a) from t1 where a like '测试';
+explain format = brief select * from t1 use index (ia) where a like 'xxx_';
+select * from t1 use index (ia) where a like 'xxx_';
 create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a));
 insert into t2 value('测试'),('测试  ');
 explain format = brief select *,length(a) from t2 where a like '测试 %';

From bb1f180c8d725fb395bd3e10497a54f2512fbe89 Mon Sep 17 00:00:00 2001
From: time-and-fate <25057648+time-and-fate@users.noreply.github.com>
Date: Tue, 28 Nov 2023 22:57:06 +0800
Subject: [PATCH 2/3] update test result

---
 .../casetest/physicalplantest/testdata/plan_suite_out.json    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json
index 234706233bd4e..8a7fc8f06327e 100644
--- a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json
+++ b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json
@@ -2195,7 +2195,7 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc_'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 'abc%af'",
@@ -2223,7 +2223,7 @@
       },
       {
         "SQL": "select a from t where c_str like 'abc\\__'",
-        "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
+        "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
       },
       {
         "SQL": "select a from t where c_str like 123",

From 261466baba2400dac041a6a044a76f5d9eb7e681 Mon Sep 17 00:00:00 2001
From: time-and-fate <25057648+time-and-fate@users.noreply.github.com>
Date: Tue, 28 Nov 2023 23:06:48 +0800
Subject: [PATCH 3/3] update test result

---
 pkg/util/ranger/ranger_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/util/ranger/ranger_test.go b/pkg/util/ranger/ranger_test.go
index aeb2b8a1dc31b..49c073ed9e570 100644
--- a/pkg/util/ranger/ranger_test.go
+++ b/pkg/util/ranger/ranger_test.go
@@ -1114,7 +1114,7 @@ create table t(
 			exprStr:     "a LIKE 'abc_'",
 			accessConds: "[like(test.t.a, abc_, 92)]",
 			filterConds: "[like(test.t.a, abc_, 92)]",
-			resultStr:   "[(\"abc\",\"abd\")]",
+			resultStr:   "[[\"abc\",\"abd\")]",
 		},
 		{
 			indexPos:    0,