From 00617c96ef752b10fb9a0f20256548dd07a86256 Mon Sep 17 00:00:00 2001 From: wjHuang Date: Tue, 31 Jan 2023 20:29:55 +0800 Subject: [PATCH] expression, cmd: fix ColumnSubstitute and allow some cases to substitute (#38826) close pingcap/tidb#38736, close pingcap/tidb#40536 --- ...lation_check_use_collation_disabled.result | 24 +++++++++++++++++++ ...llation_check_use_collation_enabled.result | 24 +++++++++++++++++++ cmd/explaintest/r/subquery.result | 10 ++++---- .../t/collation_check_use_collation.test | 14 +++++++++++ expression/collation.go | 8 ------- expression/integration_test.go | 9 +++++++ expression/util.go | 22 +++++++++++++---- planner/core/exhaust_physical_plans.go | 7 ++++-- 8 files changed, 97 insertions(+), 21 deletions(-) diff --git a/cmd/explaintest/r/collation_check_use_collation_disabled.result b/cmd/explaintest/r/collation_check_use_collation_disabled.result index 2c0bd306f445b..9e633133b1f4f 100644 --- a/cmd/explaintest/r/collation_check_use_collation_disabled.result +++ b/cmd/explaintest/r/collation_check_use_collation_disabled.result @@ -154,4 +154,28 @@ insert into t1 values ('-1'); insert into t2 values (0x2d31, ''); select * from t1, t2 where t1.a in (t2.b, 3); a b c +drop table if exists t0; +drop table if exists t1; +CREATE TABLE t0(c0 BOOL, c1 INT); +CREATE TABLE t1 LIKE t0; +CREATE VIEW v0(c0) AS SELECT IS_IPV4(t0.c1) FROM t0, t1; +INSERT INTO t0(c0, c1) VALUES (true, 0); +INSERT INTO t1(c0, c1) VALUES (true, 2); +SELECT v0.c0 FROM v0; +c0 +0 +SELECT (v0.c0)NOT LIKE(BINARY v0.c0) FROM v0; +(v0.c0)NOT LIKE(BINARY v0.c0) +0 +SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +c0 +desc format='brief' SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +id estRows task access object operator info +Projection 80000000.00 root is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20)))->Column#7 +└─HashJoin 80000000.00 root CARTESIAN inner join + ├─Selection(Build) 8000.00 root not(like(cast(is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20))), var_string(20)), cast(is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20))), binary(1)), 92)) + │ └─TableReader 10000.00 root data:TableFullScan + │ └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo + └─TableReader(Probe) 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo use test diff --git a/cmd/explaintest/r/collation_check_use_collation_enabled.result b/cmd/explaintest/r/collation_check_use_collation_enabled.result index 838c6beba6535..3f1113fbcd868 100644 --- a/cmd/explaintest/r/collation_check_use_collation_enabled.result +++ b/cmd/explaintest/r/collation_check_use_collation_enabled.result @@ -173,4 +173,28 @@ insert into t1 values ('-1'); insert into t2 values (0x2d31, ''); select * from t1, t2 where t1.a in (t2.b, 3); a b c +drop table if exists t0; +drop table if exists t1; +CREATE TABLE t0(c0 BOOL, c1 INT); +CREATE TABLE t1 LIKE t0; +CREATE VIEW v0(c0) AS SELECT IS_IPV4(t0.c1) FROM t0, t1; +INSERT INTO t0(c0, c1) VALUES (true, 0); +INSERT INTO t1(c0, c1) VALUES (true, 2); +SELECT v0.c0 FROM v0; +c0 +0 +SELECT (v0.c0)NOT LIKE(BINARY v0.c0) FROM v0; +(v0.c0)NOT LIKE(BINARY v0.c0) +0 +SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +c0 +desc format='brief' SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +id estRows task access object operator info +Projection 80000000.00 root is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20)))->Column#7 +└─HashJoin 80000000.00 root CARTESIAN inner join + ├─Selection(Build) 8000.00 root not(like(cast(is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20))), var_string(20)), cast(is_ipv4(cast(collation_check_use_collation.t0.c1, var_string(20))), binary(1)), 92)) + │ └─TableReader 10000.00 root data:TableFullScan + │ └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo + └─TableReader(Probe) 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo use test diff --git a/cmd/explaintest/r/subquery.result b/cmd/explaintest/r/subquery.result index 0cf9302f425c0..ea5a17f2ff3e3 100644 --- a/cmd/explaintest/r/subquery.result +++ b/cmd/explaintest/r/subquery.result @@ -56,13 +56,11 @@ insert into exam values(1, 'math', 100); set names utf8 collate utf8_general_ci; explain format = 'brief' select * from stu where stu.name not in (select 'guo' from exam where exam.stu_id = stu.id); id estRows task access object operator info -Apply 10000.00 root CARTESIAN anti semi join, other cond:eq(test.stu.name, Column#8) +HashJoin 8000.00 root anti semi join, equal:[eq(test.stu.id, test.exam.stu_id)], other cond:eq(test.stu.name, "guo") ├─TableReader(Build) 10000.00 root data:TableFullScan -│ └─TableFullScan 10000.00 cop[tikv] table:stu keep order:false, stats:pseudo -└─Projection(Probe) 100000.00 root guo->Column#8 - └─TableReader 100000.00 root data:Selection - └─Selection 100000.00 cop[tikv] eq(test.exam.stu_id, test.stu.id) - └─TableFullScan 100000000.00 cop[tikv] table:exam keep order:false, stats:pseudo +│ └─TableFullScan 10000.00 cop[tikv] table:exam keep order:false, stats:pseudo +└─TableReader(Probe) 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:stu keep order:false, stats:pseudo select * from stu where stu.name not in (select 'guo' from exam where exam.stu_id = stu.id); id name set names utf8mb4; diff --git a/cmd/explaintest/t/collation_check_use_collation.test b/cmd/explaintest/t/collation_check_use_collation.test index 62fbea05ae628..adcd8695b38c0 100644 --- a/cmd/explaintest/t/collation_check_use_collation.test +++ b/cmd/explaintest/t/collation_check_use_collation.test @@ -110,5 +110,19 @@ insert into t1 values ('-1'); insert into t2 values (0x2d31, ''); select * from t1, t2 where t1.a in (t2.b, 3); +# issue 38736 +drop table if exists t0; +drop table if exists t1; +CREATE TABLE t0(c0 BOOL, c1 INT); +CREATE TABLE t1 LIKE t0; +CREATE VIEW v0(c0) AS SELECT IS_IPV4(t0.c1) FROM t0, t1; +INSERT INTO t0(c0, c1) VALUES (true, 0); +INSERT INTO t1(c0, c1) VALUES (true, 2); + +SELECT v0.c0 FROM v0; +SELECT (v0.c0)NOT LIKE(BINARY v0.c0) FROM v0; +SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); +desc format='brief' SELECT v0.c0 FROM v0 WHERE (v0.c0)NOT LIKE(BINARY v0.c0); + # cleanup environment use test diff --git a/expression/collation.go b/expression/collation.go index eebab0aa5bc1f..8b11d3198a40e 100644 --- a/expression/collation.go +++ b/expression/collation.go @@ -299,14 +299,6 @@ func deriveCollation(ctx sessionctx.Context, funcName string, args []Expression, return ec, nil } -// DeriveCollationFromExprs derives collation information from these expressions. -// Deprecated, use CheckAndDeriveCollationFromExprs instead. -// TODO: remove this function after the all usage is replaced by CheckAndDeriveCollationFromExprs -func DeriveCollationFromExprs(ctx sessionctx.Context, exprs ...Expression) (dstCharset, dstCollation string) { - collation := inferCollation(exprs...) - return collation.Charset, collation.Collation -} - // CheckAndDeriveCollationFromExprs derives collation information from these expressions, return error if derives collation error. func CheckAndDeriveCollationFromExprs(ctx sessionctx.Context, funcName string, evalType types.EvalType, args ...Expression) (et *ExprCollation, err error) { ec := inferCollation(args...) diff --git a/expression/integration_test.go b/expression/integration_test.go index a49df593d201c..5555de7e0aa62 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -7893,3 +7893,12 @@ func TestIssue39146(t *testing.T) { tk.MustExec("set @@tidb_enable_vectorized_expression = off;") tk.MustQuery(`select str_to_date(substr(dest,1,6),'%H%i%s') from sun;`).Check(testkit.Rows("20:23:10")) } + +func TestIssue40536(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("CREATE TABLE `6bf9e76d-ab44-4031-8a07-418b10741580` (\n `e0b5f703-6cfe-49b4-bc21-16a6455e43a7` set('7','va','ung60','ow','1g','gxwz5','uhnh','k','5la1','q8d9c','1f') NOT NULL DEFAULT '7,1g,uhnh,5la1,q8d9c',\n `fbc3527f-9617-4b9d-a5dc-4be31c00d8a5` datetime DEFAULT '6449-09-28 14:39:04',\n PRIMARY KEY (`e0b5f703-6cfe-49b4-bc21-16a6455e43a7`) /*T![clustered_index] CLUSTERED */\n) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;") + tk.MustExec("CREATE TABLE `8919f3f4-25be-4a1a-904a-bb5e863d8fc8` (\n `9804d5f2-cbc7-43b7-b241-ea2656dc941a` enum('s951','36d','ua65','49yru','6l2em','4ea','jf2d2','vprsc','3yl7n','hz','ov') DEFAULT '4ea',\n `323cdbcb-0c14-4362-90ab-ea42caaed6a5` year(4) NOT NULL DEFAULT '1983',\n `b9b70f39-1a02-4114-9d7d-fa6259c1b691` time DEFAULT '20:18:04',\n PRIMARY KEY (`323cdbcb-0c14-4362-90ab-ea42caaed6a5`) /*T![clustered_index] CLUSTERED */,\n KEY `a704d6bb-772b-44ea-8cb0-6f7491c1aaa6` (`323cdbcb-0c14-4362-90ab-ea42caaed6a5`,`9804d5f2-cbc7-43b7-b241-ea2656dc941a`)\n) ENGINE=InnoDB DEFAULT CHARSET=ascii COLLATE=ascii_bin;") + tk.MustExec("delete from `6bf9e76d-ab44-4031-8a07-418b10741580` where not( `6bf9e76d-ab44-4031-8a07-418b10741580`.`e0b5f703-6cfe-49b4-bc21-16a6455e43a7` in ( select `9804d5f2-cbc7-43b7-b241-ea2656dc941a` from `8919f3f4-25be-4a1a-904a-bb5e863d8fc8` where `6bf9e76d-ab44-4031-8a07-418b10741580`.`e0b5f703-6cfe-49b4-bc21-16a6455e43a7` in ( '1f' ) and `6bf9e76d-ab44-4031-8a07-418b10741580`.`e0b5f703-6cfe-49b4-bc21-16a6455e43a7` in ( '1g' ,'va' ,'uhnh' ) ) ) and not( IsNull( `6bf9e76d-ab44-4031-8a07-418b10741580`.`e0b5f703-6cfe-49b4-bc21-16a6455e43a7` ) );\n") +} diff --git a/expression/util.go b/expression/util.go index 3f4b826239a1d..929dce489791d 100644 --- a/expression/util.go +++ b/expression/util.go @@ -415,7 +415,6 @@ func ColumnSubstituteImpl(expr Expression, schema *Schema, newExprs []Expression if v.InOperand { newExpr = SetExprColumnInOperand(newExpr) } - newExpr.SetCoercibility(v.Coercibility()) return true, false, newExpr case *ScalarFunction: substituted := false @@ -438,7 +437,11 @@ func ColumnSubstituteImpl(expr Expression, schema *Schema, newExprs []Expression // cowExprRef is a copy-on-write util, args array allocation happens only // when expr in args is changed refExprArr := cowExprRef{v.GetArgs(), nil} - _, coll := DeriveCollationFromExprs(v.GetCtx(), v.GetArgs()...) + oldCollEt, err := CheckAndDeriveCollationFromExprs(v.GetCtx(), v.FuncName.L, v.RetType.EvalType(), v.GetArgs()...) + if err != nil { + logutil.BgLogger().Error("Unexpected error happened during ColumnSubstitution", zap.Stack("stack")) + return false, false, v + } var tmpArgForCollCheck []Expression if collate.NewCollationEnabled() { tmpArgForCollCheck = make([]Expression, len(v.GetArgs())) @@ -454,9 +457,18 @@ func ColumnSubstituteImpl(expr Expression, schema *Schema, newExprs []Expression changed = false copy(tmpArgForCollCheck, refExprArr.Result()) tmpArgForCollCheck[idx] = newFuncExpr - _, newColl := DeriveCollationFromExprs(v.GetCtx(), tmpArgForCollCheck...) - if coll == newColl { - changed = checkCollationStrictness(coll, newFuncExpr.GetType().GetCollate()) + newCollEt, err := CheckAndDeriveCollationFromExprs(v.GetCtx(), v.FuncName.L, v.RetType.EvalType(), tmpArgForCollCheck...) + if err != nil { + logutil.BgLogger().Error("Unexpected error happened during ColumnSubstitution", zap.Stack("stack")) + return false, failed, v + } + if oldCollEt.Collation == newCollEt.Collation { + if newFuncExpr.GetType().GetCollate() == arg.GetType().GetCollate() && newFuncExpr.Coercibility() == arg.Coercibility() { + // It's safe to use the new expression, otherwise some cases in projection push-down will be wrong. + changed = true + } else { + changed = checkCollationStrictness(oldCollEt.Collation, newFuncExpr.GetType().GetCollate()) + } } } hasFail = hasFail || failed || oldChanged != changed diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index 205a2d8242b4b..61575810da1fb 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -1325,8 +1325,11 @@ func (ijHelper *indexJoinBuildHelper) resetContextForIndex(innerKeys []*expressi if ijHelper.curIdxOff2KeyOff[i] >= 0 { // Don't use the join columns if their collations are unmatched and the new collation is enabled. if collate.NewCollationEnabled() && types.IsString(idxCol.RetType.GetType()) && types.IsString(outerKeys[ijHelper.curIdxOff2KeyOff[i]].RetType.GetType()) { - _, coll := expression.DeriveCollationFromExprs(nil, idxCol, outerKeys[ijHelper.curIdxOff2KeyOff[i]]) - if !collate.CompatibleCollate(idxCol.GetType().GetCollate(), coll) { + et, err := expression.CheckAndDeriveCollationFromExprs(ijHelper.innerPlan.ctx, "equal", types.ETInt, idxCol, outerKeys[ijHelper.curIdxOff2KeyOff[i]]) + if err != nil { + logutil.BgLogger().Error("Unexpected error happened during constructing index join", zap.Stack("stack")) + } + if !collate.CompatibleCollate(idxCol.GetType().GetCollate(), et.Collation) { ijHelper.curIdxOff2KeyOff[i] = -1 } }