From 22b8219d63263e214216f25482add09d0100f9b3 Mon Sep 17 00:00:00 2001 From: wjHuang Date: Wed, 24 Apr 2024 00:45:11 +0800 Subject: [PATCH 1/2] This is an automated cherry-pick of #52812 Signed-off-by: ti-chi-bot --- .../r/collation_misc_disabled.result | 23 ++++++++++++++++++ .../r/collation_misc_enabled.result | 24 +++++++++++++++++++ cmd/explaintest/t/collation_misc.test | 16 +++++++++++++ planner/core/expression_rewriter.go | 7 ++++++ 4 files changed, 70 insertions(+) diff --git a/cmd/explaintest/r/collation_misc_disabled.result b/cmd/explaintest/r/collation_misc_disabled.result index a66f63ead2db9..8419c9647df66 100644 --- a/cmd/explaintest/r/collation_misc_disabled.result +++ b/cmd/explaintest/r/collation_misc_disabled.result @@ -120,4 +120,27 @@ binary binary 63 Yes Yes 1 ascii_bin ascii 65 Yes Yes 1 utf8_bin utf8 83 Yes Yes 1 gbk_bin gbk 87 Yes Yes 1 +<<<<<<< HEAD:cmd/explaintest/r/collation_misc_disabled.result use test; +======= +create database if not exists cd_test_utf8mb4_0900_bin; +use cd_test_utf8mb4_0900_bin; +create table t (id varchar(255) primary key clustered, a varchar(255) collate utf8mb4_0900_bin, b varchar(255) collate utf8mb4_bin, key idx(a, b)); +insert into t values ("1", "a ", "a"); +select /*+USE_INDEX(t, idx)*/ * from t; +id a b +1 a a +drop table if exists t1; +drop table if exists t2; +create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; +create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; +desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); +id estRows task access object operator info +HashJoin 12.50 root inner join, equal:[eq(cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code)] +├─TableReader(Build) 10.00 root data:Selection +│ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1"), not(isnull(cd_test_utf8mb4_0900_bin.t2.code)) +│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo +└─TableReader(Probe) 10.00 root data:Selection + └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1"), not(isnull(cd_test_utf8mb4_0900_bin.t1.code)) + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo +>>>>>>> cf5c68e5558 (expression: don't cast collation for in expression is the new collation is disabled (#52812)):tests/integrationtest/r/collation_misc_disabled.result diff --git a/cmd/explaintest/r/collation_misc_enabled.result b/cmd/explaintest/r/collation_misc_enabled.result index a088ddb0b2c9d..9c541410a0506 100644 --- a/cmd/explaintest/r/collation_misc_enabled.result +++ b/cmd/explaintest/r/collation_misc_enabled.result @@ -133,4 +133,28 @@ utf8_unicode_ci utf8 192 Yes 1 utf8mb4_bin utf8mb4 46 Yes Yes 1 utf8mb4_general_ci utf8mb4 45 Yes 1 utf8mb4_unicode_ci utf8mb4 224 Yes 1 +<<<<<<< HEAD:cmd/explaintest/r/collation_misc_enabled.result use test; +======= +create database if not exists cd_test_utf8mb4_0900_bin; +use cd_test_utf8mb4_0900_bin; +create table t (id varchar(255) primary key clustered, a varchar(255) collate utf8mb4_0900_bin, b varchar(255) collate utf8mb4_bin, key idx(a, b)); +insert into t values ("1", "a ", "a"); +select /*+USE_INDEX(t, idx)*/ * from t; +id a b +1 a a +drop table if exists t1; +drop table if exists t2; +create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; +create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; +desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); +id estRows task access object operator info +Projection 80000.00 root cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code +└─HashJoin 80000.00 root CARTESIAN inner join + ├─TableReader(Build) 10.00 root data:Selection + │ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") + │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo + └─TableReader(Probe) 8000.00 root data:Selection + └─Selection 8000.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo +>>>>>>> cf5c68e5558 (expression: don't cast collation for in expression is the new collation is disabled (#52812)):tests/integrationtest/r/collation_misc_enabled.result diff --git a/cmd/explaintest/t/collation_misc.test b/cmd/explaintest/t/collation_misc.test index 433cd2f7a9051..c82d6ad2fc87f 100644 --- a/cmd/explaintest/t/collation_misc.test +++ b/cmd/explaintest/t/collation_misc.test @@ -86,4 +86,20 @@ select * from information_schema.COLLATION_CHARACTER_SET_APPLICABILITY where COL show charset; show collation; +<<<<<<< HEAD:cmd/explaintest/t/collation_misc.test use test; +======= +# Issue46690 +create database if not exists cd_test_utf8mb4_0900_bin; +use cd_test_utf8mb4_0900_bin; +create table t (id varchar(255) primary key clustered, a varchar(255) collate utf8mb4_0900_bin, b varchar(255) collate utf8mb4_bin, key idx(a, b)); +insert into t values ("1", "a ", "a"); +select /*+USE_INDEX(t, idx)*/ * from t; + +# issue 52772 +drop table if exists t1; +drop table if exists t2; +create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; +create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; +desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); +>>>>>>> cf5c68e5558 (expression: don't cast collation for in expression is the new collation is disabled (#52812)):tests/integrationtest/t/collation_misc.test diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 7ad98964b7daf..fc77ad403261d 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -1651,6 +1651,13 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen if colLen != 1 { return } + if !collate.NewCollationEnabled() { + // See https://github.com/pingcap/tidb/issues/52772 + // This function will apply CoercibilityExplicit to the casted expression, but some checks(during ColumnSubstituteImpl) is missed when the new + // collation is disabled, then lead to panic. + // To work around this issue, we can skip the function, it should be good since the collation is disabled. + return + } for i := stkLen - elemCnt; i < stkLen; i++ { // todo: consider refining the code and reusing expression.BuildCollationFunction here if er.ctxStack[i].GetType().EvalType() == types.ETString { From bb7085deddda7530b54ae3708c6c18c4014f87d8 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Wed, 24 Apr 2024 18:45:17 +0800 Subject: [PATCH 2/2] done Signed-off-by: wjhuang2016 --- .../r/collation_misc_disabled.result | 17 +++-------------- cmd/explaintest/r/collation_misc_enabled.result | 17 +++-------------- cmd/explaintest/t/collation_misc.test | 11 ----------- 3 files changed, 6 insertions(+), 39 deletions(-) diff --git a/cmd/explaintest/r/collation_misc_disabled.result b/cmd/explaintest/r/collation_misc_disabled.result index 8419c9647df66..a3e273c1a9b4e 100644 --- a/cmd/explaintest/r/collation_misc_disabled.result +++ b/cmd/explaintest/r/collation_misc_disabled.result @@ -120,27 +120,16 @@ binary binary 63 Yes Yes 1 ascii_bin ascii 65 Yes Yes 1 utf8_bin utf8 83 Yes Yes 1 gbk_bin gbk 87 Yes Yes 1 -<<<<<<< HEAD:cmd/explaintest/r/collation_misc_disabled.result -use test; -======= -create database if not exists cd_test_utf8mb4_0900_bin; -use cd_test_utf8mb4_0900_bin; -create table t (id varchar(255) primary key clustered, a varchar(255) collate utf8mb4_0900_bin, b varchar(255) collate utf8mb4_bin, key idx(a, b)); -insert into t values ("1", "a ", "a"); -select /*+USE_INDEX(t, idx)*/ * from t; -id a b -1 a a drop table if exists t1; drop table if exists t2; create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); id estRows task access object operator info -HashJoin 12.50 root inner join, equal:[eq(cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code)] +HashJoin 12.50 root inner join, equal:[eq(cd_test_latin1.t1.code, cd_test_latin1.t2.code)] ├─TableReader(Build) 10.00 root data:Selection -│ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1"), not(isnull(cd_test_utf8mb4_0900_bin.t2.code)) +│ └─Selection 10.00 cop[tikv] eq(cd_test_latin1.t2.code, "1"), not(isnull(cd_test_latin1.t2.code)) │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo └─TableReader(Probe) 10.00 root data:Selection - └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1"), not(isnull(cd_test_utf8mb4_0900_bin.t1.code)) + └─Selection 10.00 cop[tikv] eq(cd_test_latin1.t1.code, "1"), not(isnull(cd_test_latin1.t1.code)) └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo ->>>>>>> cf5c68e5558 (expression: don't cast collation for in expression is the new collation is disabled (#52812)):tests/integrationtest/r/collation_misc_disabled.result diff --git a/cmd/explaintest/r/collation_misc_enabled.result b/cmd/explaintest/r/collation_misc_enabled.result index 9c541410a0506..38e9a8cc1d8c5 100644 --- a/cmd/explaintest/r/collation_misc_enabled.result +++ b/cmd/explaintest/r/collation_misc_enabled.result @@ -133,28 +133,17 @@ utf8_unicode_ci utf8 192 Yes 1 utf8mb4_bin utf8mb4 46 Yes Yes 1 utf8mb4_general_ci utf8mb4 45 Yes 1 utf8mb4_unicode_ci utf8mb4 224 Yes 1 -<<<<<<< HEAD:cmd/explaintest/r/collation_misc_enabled.result -use test; -======= -create database if not exists cd_test_utf8mb4_0900_bin; -use cd_test_utf8mb4_0900_bin; -create table t (id varchar(255) primary key clustered, a varchar(255) collate utf8mb4_0900_bin, b varchar(255) collate utf8mb4_bin, key idx(a, b)); -insert into t values ("1", "a ", "a"); -select /*+USE_INDEX(t, idx)*/ * from t; -id a b -1 a a drop table if exists t1; drop table if exists t2; create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); id estRows task access object operator info -Projection 80000.00 root cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code +Projection 80000.00 root cd_test_utf8.t1.code, cd_test_utf8.t2.code └─HashJoin 80000.00 root CARTESIAN inner join ├─TableReader(Build) 10.00 root data:Selection - │ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") + │ └─Selection 10.00 cop[tikv] eq(cd_test_utf8.t2.code, "1") │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo └─TableReader(Probe) 8000.00 root data:Selection - └─Selection 8000.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") + └─Selection 8000.00 cop[tikv] eq(cd_test_utf8.t1.code, "1") └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo ->>>>>>> cf5c68e5558 (expression: don't cast collation for in expression is the new collation is disabled (#52812)):tests/integrationtest/r/collation_misc_enabled.result diff --git a/cmd/explaintest/t/collation_misc.test b/cmd/explaintest/t/collation_misc.test index c82d6ad2fc87f..d789823c806e4 100644 --- a/cmd/explaintest/t/collation_misc.test +++ b/cmd/explaintest/t/collation_misc.test @@ -86,20 +86,9 @@ select * from information_schema.COLLATION_CHARACTER_SET_APPLICABILITY where COL show charset; show collation; -<<<<<<< HEAD:cmd/explaintest/t/collation_misc.test -use test; -======= -# Issue46690 -create database if not exists cd_test_utf8mb4_0900_bin; -use cd_test_utf8mb4_0900_bin; -create table t (id varchar(255) primary key clustered, a varchar(255) collate utf8mb4_0900_bin, b varchar(255) collate utf8mb4_bin, key idx(a, b)); -insert into t values ("1", "a ", "a"); -select /*+USE_INDEX(t, idx)*/ * from t; - # issue 52772 drop table if exists t1; drop table if exists t2; create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); ->>>>>>> cf5c68e5558 (expression: don't cast collation for in expression is the new collation is disabled (#52812)):tests/integrationtest/t/collation_misc.test