diff --git a/cmd/explaintest/r/explain_generate_column_substitute.result b/cmd/explaintest/r/explain_generate_column_substitute.result index 3cab89b08dfc5..fb209b6bcabd5 100644 --- a/cmd/explaintest/r/explain_generate_column_substitute.result +++ b/cmd/explaintest/r/explain_generate_column_substitute.result @@ -1,3 +1,4 @@ +set names utf8mb4; use test; drop table if exists t; create table t(a int, b real, c bigint as ((a+1)) virtual, e real as ((b+a))); diff --git a/cmd/explaintest/r/index_merge.result b/cmd/explaintest/r/index_merge.result index ddce511db13c8..014aac58825b5 100644 --- a/cmd/explaintest/r/index_merge.result +++ b/cmd/explaintest/r/index_merge.result @@ -322,7 +322,7 @@ drop view if exists v2; create view v2 as select /*+ use_index_merge(t1) */ * from t1 where c1 < 10 or c2 < 10 and c3 < 10; show create view v2; View Create View character_set_client collation_connection -v2 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`%` SQL SECURITY DEFINER VIEW `v2` (`c1`, `c2`, `c3`) AS SELECT /*+ USE_INDEX_MERGE(`t1` )*/ `test`.`t1`.`c1` AS `c1`,`test`.`t1`.`c2` AS `c2`,`test`.`t1`.`c3` AS `c3` FROM `test`.`t1` WHERE `c1`<10 OR `c2`<10 AND `c3`<10 utf8mb4 utf8mb4_general_ci +v2 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`%` SQL SECURITY DEFINER VIEW `v2` (`c1`, `c2`, `c3`) AS SELECT /*+ USE_INDEX_MERGE(`t1` )*/ `test`.`t1`.`c1` AS `c1`,`test`.`t1`.`c2` AS `c2`,`test`.`t1`.`c3` AS `c3` FROM `test`.`t1` WHERE `c1`<10 OR `c2`<10 AND `c3`<10 utf8mb4 utf8mb4_bin select * from v2 order by 1; c1 c2 c3 1 1 1 diff --git a/cmd/explaintest/t/explain_generate_column_substitute.test b/cmd/explaintest/t/explain_generate_column_substitute.test index 4e47ce34607ae..f71b05c59f5c7 100644 --- a/cmd/explaintest/t/explain_generate_column_substitute.test +++ b/cmd/explaintest/t/explain_generate_column_substitute.test @@ -1,3 +1,4 @@ +set names utf8mb4; use test; drop table if exists t; create table t(a int, b real, c bigint as ((a+1)) virtual, e real as ((b+a))); diff --git a/expression/integration_serial_test.go b/expression/integration_serial_test.go index d43a5342d8b23..01013e56a0283 100644 --- a/expression/integration_serial_test.go +++ b/expression/integration_serial_test.go @@ -96,6 +96,29 @@ func TestIssue17891(t *testing.T) { tk.MustExec("create table test(id int, value set ('a','b','c') charset utf8mb4 collate utf8mb4_general_ci default 'a,B ,C');") } +func TestIssue31174(t *testing.T) { + collate.SetNewCollationEnabledForTest(true) + defer collate.SetNewCollationEnabledForTest(false) + + store, clean := testkit.CreateMockStore(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a char(4) collate utf8_general_ci primary key /*T![clustered_index] clustered */);") + tk.MustExec("insert into t values('`?');") + // The 'like' condition can not be used to construct the range. + tk.HasPlan("select * from t where a like '`%';", "TableFullScan") + tk.MustQuery("select * from t where a like '`%';").Check(testkit.Rows("`?")) + + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a char(4) collate binary primary key /*T![clustered_index] clustered */);") + tk.MustExec("insert into t values('`?');") + tk.HasPlan("select * from t where a like '`%';", "TableRangeScan") + tk.MustQuery("select * from t where a like '`%';").Check(testkit.Rows("`?\x00\x00")) +} + func TestIssue20268(t *testing.T) { collate.SetNewCollationEnabledForTest(true) defer collate.SetNewCollationEnabledForTest(false) diff --git a/util/collate/collate.go b/util/collate/collate.go index f74bacb96130f..1b31abcb62ffb 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -331,10 +331,13 @@ func IsCICollation(collate string) bool { collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci" } -// IsBinCollation returns if the collation is 'xx_bin'. +// IsBinCollation returns if the collation is 'xx_bin' or 'bin'. +// The function is to determine whether the sortkey of a char type of data under the collation is equal to the data itself, +// and both xx_bin and collationBin are satisfied. func IsBinCollation(collate string) bool { return collate == charset.CollationASCII || collate == charset.CollationLatin1 || - collate == charset.CollationUTF8 || collate == charset.CollationUTF8MB4 + collate == charset.CollationUTF8 || collate == charset.CollationUTF8MB4 || + collate == charset.CollationBin } // CollationToProto converts collation from string to int32(used by protocol). diff --git a/util/ranger/checker.go b/util/ranger/checker.go index 8431bc05abef6..3f667b5a8733e 100644 --- a/util/ranger/checker.go +++ b/util/ranger/checker.go @@ -112,6 +112,16 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) bool { _, collation := scalar.CharsetAndCollation() + if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) { + // The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte. + // However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order. + // For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61). + // Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key. + // Finally, the range comes to be [`, A], which is actually an empty range. + // See https://github.com/pingcap/tidb/issues/31174 for more details. + // In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range. + return false + } if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().Collate, collation) { return false }