From c949cd4e9af84aafc0b14c9bb1afa682441147d2 Mon Sep 17 00:00:00 2001 From: wjHuang Date: Wed, 24 Apr 2024 00:45:11 +0800 Subject: [PATCH] expression: don't cast collation for in expression is the new collation is disabled (#52812) close pingcap/tidb#52772 --- pkg/planner/core/expression_rewriter.go | 7 +++++++ .../r/collation_misc_disabled.result | 13 +++++++++++++ .../r/collation_misc_enabled.result | 14 ++++++++++++++ tests/integrationtest/t/collation_misc.test | 7 +++++++ 4 files changed, 41 insertions(+) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index b249edd19eccb..f9ceb37f590fe 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1751,6 +1751,13 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen if colLen != 1 { return } + if !collate.NewCollationEnabled() { + // See https://github.com/pingcap/tidb/issues/52772 + // This function will apply CoercibilityExplicit to the casted expression, but some checks(during ColumnSubstituteImpl) is missed when the new + // collation is disabled, then lead to panic. + // To work around this issue, we can skip the function, it should be good since the collation is disabled. + return + } for i := stkLen - elemCnt; i < stkLen; i++ { // todo: consider refining the code and reusing expression.BuildCollationFunction here if er.ctxStack[i].GetType().EvalType() == types.ETString { diff --git a/tests/integrationtest/r/collation_misc_disabled.result b/tests/integrationtest/r/collation_misc_disabled.result index 509d743bb4e23..835c97ef0ba99 100644 --- a/tests/integrationtest/r/collation_misc_disabled.result +++ b/tests/integrationtest/r/collation_misc_disabled.result @@ -125,3 +125,16 @@ insert into t values ("1", "a ", "a"); select /*+USE_INDEX(t, idx)*/ * from t; id a b 1 a a +drop table if exists t1; +drop table if exists t2; +create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; +create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; +desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); +id estRows task access object operator info +HashJoin 12.50 root inner join, equal:[eq(cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code)] +├─TableReader(Build) 10.00 root data:Selection +│ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1"), not(isnull(cd_test_utf8mb4_0900_bin.t2.code)) +│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo +└─TableReader(Probe) 10.00 root data:Selection + └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1"), not(isnull(cd_test_utf8mb4_0900_bin.t1.code)) + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo diff --git a/tests/integrationtest/r/collation_misc_enabled.result b/tests/integrationtest/r/collation_misc_enabled.result index 8525751a48281..4a8d4c0eb08c7 100644 --- a/tests/integrationtest/r/collation_misc_enabled.result +++ b/tests/integrationtest/r/collation_misc_enabled.result @@ -142,3 +142,17 @@ insert into t values ("1", "a ", "a"); select /*+USE_INDEX(t, idx)*/ * from t; id a b 1 a a +drop table if exists t1; +drop table if exists t2; +create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; +create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; +desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); +id estRows task access object operator info +Projection 80000.00 root cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code +└─HashJoin 80000.00 root CARTESIAN inner join + ├─TableReader(Build) 10.00 root data:Selection + │ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") + │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo + └─TableReader(Probe) 8000.00 root data:Selection + └─Selection 8000.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo diff --git a/tests/integrationtest/t/collation_misc.test b/tests/integrationtest/t/collation_misc.test index 5549cd265da8c..914df6e9963a1 100644 --- a/tests/integrationtest/t/collation_misc.test +++ b/tests/integrationtest/t/collation_misc.test @@ -89,3 +89,10 @@ use cd_test_utf8mb4_0900_bin; create table t (id varchar(255) primary key clustered, a varchar(255) collate utf8mb4_0900_bin, b varchar(255) collate utf8mb4_bin, key idx(a, b)); insert into t values ("1", "a ", "a"); select /*+USE_INDEX(t, idx)*/ * from t; + +# issue 52772 +drop table if exists t1; +drop table if exists t2; +create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; +create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; +desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1');