From 29d571d6b5d177380839433c505d08fb45a68417 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Mon, 22 Apr 2024 17:25:05 +0800 Subject: [PATCH 1/5] done Signed-off-by: wjhuang2016 --- pkg/expression/collation.go | 13 +++++++++++++ .../r/collation_misc_disabled.result | 13 +++++++++++++ .../r/collation_misc_enabled.result | 14 ++++++++++++++ tests/integrationtest/t/collation_misc.test | 7 +++++++ 4 files changed, 47 insertions(+) diff --git a/pkg/expression/collation.go b/pkg/expression/collation.go index b9a02e7223adb..db20cb1217ffe 100644 --- a/pkg/expression/collation.go +++ b/pkg/expression/collation.go @@ -302,6 +302,19 @@ func deriveCollation(ctx BuildContext, funcName string, args []Expression, retTy // CheckAndDeriveCollationFromExprs derives collation information from these expressions, return error if derives collation error. func CheckAndDeriveCollationFromExprs(ctx BuildContext, funcName string, evalType types.EvalType, args ...Expression) (et *ExprCollation, err error) { + defer func() { + if err != nil && !collate.NewCollationEnabled() { + // args[0] can't be nil here. + // Use the collation of the first argument instead of returning an error. + et = &ExprCollation{ + Coer: args[0].Coercibility(), + Repe: args[0].Repertoire(), + Charset: args[0].GetType().GetCharset(), + Collation: args[0].GetType().GetCollate(), + } + err = nil + } + }() ec := inferCollation(args...) if ec == nil { return nil, illegalMixCollationErr(funcName, args) diff --git a/tests/integrationtest/r/collation_misc_disabled.result b/tests/integrationtest/r/collation_misc_disabled.result index 035de1187efa1..843545cfa5129 100644 --- a/tests/integrationtest/r/collation_misc_disabled.result +++ b/tests/integrationtest/r/collation_misc_disabled.result @@ -122,3 +122,16 @@ insert into t values ("1", "a ", "a"); select /*+USE_INDEX(t, idx)*/ * from t; id a b 1 a a +drop table if exists t1; +drop table if exists t2; +create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; +create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; +desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); +id estRows task access object operator info +HashJoin 100.00 root CARTESIAN inner join +├─TableReader(Build) 10.00 root data:Selection +│ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") +│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo +└─TableReader(Probe) 10.00 root data:Selection + └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo diff --git a/tests/integrationtest/r/collation_misc_enabled.result b/tests/integrationtest/r/collation_misc_enabled.result index bee35b7f1bd7d..9fb62c9530b58 100644 --- a/tests/integrationtest/r/collation_misc_enabled.result +++ b/tests/integrationtest/r/collation_misc_enabled.result @@ -136,3 +136,17 @@ insert into t values ("1", "a ", "a"); select /*+USE_INDEX(t, idx)*/ * from t; id a b 1 a a +drop table if exists t1; +drop table if exists t2; +create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; +create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; +desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); +id estRows task access object operator info +Projection 80000.00 root cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code +└─HashJoin 80000.00 root CARTESIAN inner join + ├─TableReader(Build) 10.00 root data:Selection + │ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") + │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo + └─TableReader(Probe) 8000.00 root data:Selection + └─Selection 8000.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo diff --git a/tests/integrationtest/t/collation_misc.test b/tests/integrationtest/t/collation_misc.test index 85c55049d0bca..058d94e008924 100644 --- a/tests/integrationtest/t/collation_misc.test +++ b/tests/integrationtest/t/collation_misc.test @@ -89,3 +89,10 @@ use cd_test_utf8mb4_0900_bin; create table t (id varchar(255) primary key clustered, a varchar(255) collate utf8mb4_0900_bin, b varchar(255) collate utf8mb4_bin, key idx(a, b)); insert into t values ("1", "a ", "a"); select /*+USE_INDEX(t, idx)*/ * from t; + +# issue 52772 +drop table if exists t1; +drop table if exists t2; +create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; +create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; +desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); From 48876a6c26a720b8dfd0af5f355f87a7bd7f65c1 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Mon, 22 Apr 2024 20:27:45 +0800 Subject: [PATCH 2/5] try Signed-off-by: wjhuang2016 --- pkg/expression/collation.go | 26 +++++++++++++------------- pkg/expression/util.go | 2 ++ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/pkg/expression/collation.go b/pkg/expression/collation.go index db20cb1217ffe..8ff792e9a674a 100644 --- a/pkg/expression/collation.go +++ b/pkg/expression/collation.go @@ -302,19 +302,19 @@ func deriveCollation(ctx BuildContext, funcName string, args []Expression, retTy // CheckAndDeriveCollationFromExprs derives collation information from these expressions, return error if derives collation error. func CheckAndDeriveCollationFromExprs(ctx BuildContext, funcName string, evalType types.EvalType, args ...Expression) (et *ExprCollation, err error) { - defer func() { - if err != nil && !collate.NewCollationEnabled() { - // args[0] can't be nil here. - // Use the collation of the first argument instead of returning an error. - et = &ExprCollation{ - Coer: args[0].Coercibility(), - Repe: args[0].Repertoire(), - Charset: args[0].GetType().GetCharset(), - Collation: args[0].GetType().GetCollate(), - } - err = nil - } - }() + //defer func() { + // if err != nil && !collate.NewCollationEnabled() { + // // args[0] can't be nil here. + // // Use the collation of the first argument instead of returning an error. + // et = &ExprCollation{ + // Coer: args[0].Coercibility(), + // Repe: args[0].Repertoire(), + // Charset: args[0].GetType().GetCharset(), + // Collation: args[0].GetType().GetCollate(), + // } + // err = nil + // } + //}() ec := inferCollation(args...) if ec == nil { return nil, illegalMixCollationErr(funcName, args) diff --git a/pkg/expression/util.go b/pkg/expression/util.go index d966b90b89166..2ec79a806b84d 100644 --- a/pkg/expression/util.go +++ b/pkg/expression/util.go @@ -441,6 +441,8 @@ func ColumnSubstituteImpl(ctx BuildContext, expr Expression, schema *Schema, new if v.InOperand { newExpr = SetExprColumnInOperand(newExpr) } + newExpr = newExpr.Clone() + newExpr.SetCoercibility(v.Coercibility()) return true, false, newExpr case *ScalarFunction: substituted := false From 7ee6c0b7281f3ad7dd6b1ad2e8f49102072c453b Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Mon, 22 Apr 2024 21:01:33 +0800 Subject: [PATCH 3/5] try Signed-off-by: wjhuang2016 --- .../r/collation_misc_enabled.result | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/integrationtest/r/collation_misc_enabled.result b/tests/integrationtest/r/collation_misc_enabled.result index 9fb62c9530b58..69e4413f0818d 100644 --- a/tests/integrationtest/r/collation_misc_enabled.result +++ b/tests/integrationtest/r/collation_misc_enabled.result @@ -142,11 +142,10 @@ create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); id estRows task access object operator info -Projection 80000.00 root cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code -└─HashJoin 80000.00 root CARTESIAN inner join - ├─TableReader(Build) 10.00 root data:Selection - │ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") - │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo - └─TableReader(Probe) 8000.00 root data:Selection - └─Selection 8000.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") - └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo +HashJoin 100.00 root CARTESIAN inner join +├─TableReader(Build) 10.00 root data:Selection +│ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") +│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo +└─TableReader(Probe) 10.00 root data:Selection + └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo From 0d7d4901bc1ba292b1d5435686ae5b288becd761 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Tue, 23 Apr 2024 10:48:54 +0800 Subject: [PATCH 4/5] fix Signed-off-by: wjhuang2016 --- pkg/expression/collation.go | 13 ------------- pkg/expression/util.go | 2 -- pkg/planner/core/expression_rewriter.go | 3 +++ .../r/collation_misc_disabled.result | 6 +++--- .../r/collation_misc_enabled.result | 15 ++++++++------- 5 files changed, 14 insertions(+), 25 deletions(-) diff --git a/pkg/expression/collation.go b/pkg/expression/collation.go index 8ff792e9a674a..b9a02e7223adb 100644 --- a/pkg/expression/collation.go +++ b/pkg/expression/collation.go @@ -302,19 +302,6 @@ func deriveCollation(ctx BuildContext, funcName string, args []Expression, retTy // CheckAndDeriveCollationFromExprs derives collation information from these expressions, return error if derives collation error. func CheckAndDeriveCollationFromExprs(ctx BuildContext, funcName string, evalType types.EvalType, args ...Expression) (et *ExprCollation, err error) { - //defer func() { - // if err != nil && !collate.NewCollationEnabled() { - // // args[0] can't be nil here. - // // Use the collation of the first argument instead of returning an error. - // et = &ExprCollation{ - // Coer: args[0].Coercibility(), - // Repe: args[0].Repertoire(), - // Charset: args[0].GetType().GetCharset(), - // Collation: args[0].GetType().GetCollate(), - // } - // err = nil - // } - //}() ec := inferCollation(args...) if ec == nil { return nil, illegalMixCollationErr(funcName, args) diff --git a/pkg/expression/util.go b/pkg/expression/util.go index 2ec79a806b84d..d966b90b89166 100644 --- a/pkg/expression/util.go +++ b/pkg/expression/util.go @@ -441,8 +441,6 @@ func ColumnSubstituteImpl(ctx BuildContext, expr Expression, schema *Schema, new if v.InOperand { newExpr = SetExprColumnInOperand(newExpr) } - newExpr = newExpr.Clone() - newExpr.SetCoercibility(v.Coercibility()) return true, false, newExpr case *ScalarFunction: substituted := false diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 4785077488d97..5934500261b8d 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1951,6 +1951,9 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen if colLen != 1 { return } + if !collate.NewCollationEnabled() { + return + } for i := stkLen - elemCnt; i < stkLen; i++ { // todo: consider refining the code and reusing expression.BuildCollationFunction here if er.ctxStack[i].GetType().EvalType() == types.ETString { diff --git a/tests/integrationtest/r/collation_misc_disabled.result b/tests/integrationtest/r/collation_misc_disabled.result index 843545cfa5129..0771ac51fcdfd 100644 --- a/tests/integrationtest/r/collation_misc_disabled.result +++ b/tests/integrationtest/r/collation_misc_disabled.result @@ -128,10 +128,10 @@ create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); id estRows task access object operator info -HashJoin 100.00 root CARTESIAN inner join +HashJoin 12.50 root inner join, equal:[eq(cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code)] ├─TableReader(Build) 10.00 root data:Selection -│ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") +│ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1"), not(isnull(cd_test_utf8mb4_0900_bin.t2.code)) │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo └─TableReader(Probe) 10.00 root data:Selection - └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") + └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1"), not(isnull(cd_test_utf8mb4_0900_bin.t1.code)) └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo diff --git a/tests/integrationtest/r/collation_misc_enabled.result b/tests/integrationtest/r/collation_misc_enabled.result index 69e4413f0818d..9fb62c9530b58 100644 --- a/tests/integrationtest/r/collation_misc_enabled.result +++ b/tests/integrationtest/r/collation_misc_enabled.result @@ -142,10 +142,11 @@ create table t1(code varchar(32)) CHARSET=utf8 COLLATE=utf8_general_ci; create table t2(code varchar(32)) CHARSET=utf8 COLLATE=utf8_bin; desc format=brief select * from t1 join t2 on t1.code=t2.code and t1.code in ('1') and t2.code in ('1'); id estRows task access object operator info -HashJoin 100.00 root CARTESIAN inner join -├─TableReader(Build) 10.00 root data:Selection -│ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") -│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo -└─TableReader(Probe) 10.00 root data:Selection - └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") - └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo +Projection 80000.00 root cd_test_utf8mb4_0900_bin.t1.code, cd_test_utf8mb4_0900_bin.t2.code +└─HashJoin 80000.00 root CARTESIAN inner join + ├─TableReader(Build) 10.00 root data:Selection + │ └─Selection 10.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t2.code, "1") + │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo + └─TableReader(Probe) 8000.00 root data:Selection + └─Selection 8000.00 cop[tikv] eq(cd_test_utf8mb4_0900_bin.t1.code, "1") + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo From db923bfc35cfabbd25bd0b069fa8bb189e4a421a Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Tue, 23 Apr 2024 13:10:13 +0800 Subject: [PATCH 5/5] add comment Signed-off-by: wjhuang2016 --- pkg/planner/core/expression_rewriter.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 5934500261b8d..21beef430bf88 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1952,6 +1952,10 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen return } if !collate.NewCollationEnabled() { + // See https://github.com/pingcap/tidb/issues/52772 + // This function will apply CoercibilityExplicit to the casted expression, but some checks(during ColumnSubstituteImpl) is missed when the new + // collation is disabled, then lead to panic. + // To work around this issue, we can skip the function, it should be good since the collation is disabled. return } for i := stkLen - elemCnt; i < stkLen; i++ {