From fda7d77bbb674f53a69e71b0c9915ed899d9ad98 Mon Sep 17 00:00:00 2001 From: yibin Date: Wed, 16 Aug 2023 16:09:31 +0800 Subject: [PATCH] expression: Fix different behaviors with MySQL when comparing datetime column with numeric constant (#46147) close pingcap/tidb#38361 --- expression/builtin_compare.go | 74 ++++++++++++++++++++---- planner/core/expression_rewriter_test.go | 43 ++++++++++++++ 2 files changed, 107 insertions(+), 10 deletions(-) diff --git a/expression/builtin_compare.go b/expression/builtin_compare.go index 12e8417e0d5b2..4b2a220db33bb 100644 --- a/expression/builtin_compare.go +++ b/expression/builtin_compare.go @@ -1556,6 +1556,11 @@ func RefineComparedConstant(ctx sessionctx.Context, targetFieldType types.FieldT return con, false } +func matchRefineRule3Pattern(conEvalType types.EvalType, exprType *types.FieldType) bool { + return (exprType.GetType() == mysql.TypeTimestamp || exprType.GetType() == mysql.TypeDatetime) && + (conEvalType == types.ETReal || conEvalType == types.ETDecimal || conEvalType == types.ETInt) +} + // Since the argument refining of cmp functions can bring some risks to the plan-cache, the optimizer // needs to decide to whether to skip the refining or skip plan-cache for safety. // For example, `unsigned_int_col > ?(-1)` can be refined to `True`, but the validation of this result @@ -1565,9 +1570,10 @@ func allowCmpArgsRefining4PlanCache(ctx sessionctx.Context, args []Expression) ( return true // plan-cache disabled or no parameter in these args } - // For these 2 cases below, we skip the refining: + // For these 3 cases below, we apply the refining: // 1. year-expr const // 2. int-expr string/float/double/decimal-const + // 3. datetime/timestamp column int/float/double/decimal-const for conIdx := 0; conIdx < 2; conIdx++ { if _, isCon := args[conIdx].(*Constant); !isCon { continue // not a constant @@ -1577,14 +1583,26 @@ func allowCmpArgsRefining4PlanCache(ctx sessionctx.Context, args []Expression) ( // refine `year < 12` to `year < 2012` to guarantee the correctness. // see https://github.com/pingcap/tidb/issues/41626 for more details. exprType := args[1-conIdx].GetType() + exprEvalType := exprType.EvalType() if exprType.GetType() == mysql.TypeYear { ctx.GetSessionVars().StmtCtx.SkipPlanCache = true return true } - conType := args[conIdx].GetType().EvalType() - if exprType.EvalType() == types.ETInt && - (conType == types.ETString || conType == types.ETReal || conType == types.ETDecimal) { + // case 2: int-expr string/float/double/decimal-const + // refine `int_key < 1.1` to `int_key < 2` to generate RangeScan instead of FullScan. + conEvalType := args[conIdx].GetType().EvalType() + if exprEvalType == types.ETInt && + (conEvalType == types.ETString || conEvalType == types.ETReal || conEvalType == types.ETDecimal) { + ctx.GetSessionVars().StmtCtx.SkipPlanCache = true + return true + } + + // case 3: datetime/timestamp column int/float/double/decimal-const + // try refine numeric-const to timestamp const + // see https://github.com/pingcap/tidb/issues/38361 for more details + _, exprIsCon := args[1-conIdx].(*Constant) + if !exprIsCon && matchRefineRule3Pattern(conEvalType, exprType) { ctx.GetSessionVars().StmtCtx.SkipPlanCache = true return true } @@ -1593,15 +1611,17 @@ func allowCmpArgsRefining4PlanCache(ctx sessionctx.Context, args []Expression) ( return false } -// refineArgs will rewrite the arguments if the compare expression is `int column non-int constant` or -// `non-int constant int column`. E.g., `a < 1.1` will be rewritten to `a < 2`. It also handles comparing year type -// with int constant if the int constant falls into a sensible year representation. -// This refine operation depends on the values of these args, but these values can change when using plan-cache. +// refineArgs will rewrite the arguments if the compare expression is +// 1. `int column non-int constant` or `non-int constant int column`. E.g., `a < 1.1` will be rewritten to `a < 2`. +// 2. It also handles comparing year type with int constant if the int constant falls into a sensible year representation. +// 3. It also handles comparing datetime/timestamp column with numeric constant, try to cast numeric constant as timestamp type, do nothing if failed. +// This refining operation depends on the values of these args, but these values can change when using plan-cache. // So we have to skip this operation or mark the plan as over-optimized when using plan-cache. func (c *compareFunctionClass) refineArgs(ctx sessionctx.Context, args []Expression) []Expression { arg0Type, arg1Type := args[0].GetType(), args[1].GetType() - arg0IsInt := arg0Type.EvalType() == types.ETInt - arg1IsInt := arg1Type.EvalType() == types.ETInt + arg0EvalType, arg1EvalType := arg0Type.EvalType(), arg1Type.EvalType() + arg0IsInt := arg0EvalType == types.ETInt + arg1IsInt := arg1EvalType == types.ETInt arg0, arg0IsCon := args[0].(*Constant) arg1, arg1IsCon := args[1].(*Constant) isExceptional, finalArg0, finalArg1 := false, args[0], args[1] @@ -1613,6 +1633,14 @@ func (c *compareFunctionClass) refineArgs(ctx sessionctx.Context, args []Express // We should remove the mutable constant for correctness, because its value may be changed. RemoveMutableConst(ctx, args) + if arg0IsCon && !arg1IsCon && matchRefineRule3Pattern(arg0EvalType, arg1Type) { + return c.refineNumericConstantCmpDatetime(ctx, args, arg0, 0) + } + + if !arg0IsCon && arg1IsCon && matchRefineRule3Pattern(arg1EvalType, arg0Type) { + return c.refineNumericConstantCmpDatetime(ctx, args, arg1, 1) + } + // int non-constant [cmp] non-int constant if arg0IsInt && !arg0IsCon && !arg1IsInt && arg1IsCon { arg1, isExceptional = RefineComparedConstant(ctx, *arg0Type, arg1, c.op) @@ -1657,6 +1685,7 @@ func (c *compareFunctionClass) refineArgs(ctx sessionctx.Context, args []Express } } } + // int constant [cmp] year type if arg0IsCon && arg0IsInt && arg1Type.GetType() == mysql.TypeYear && !arg0.Value.IsNull() { adjusted, failed := types.AdjustYear(arg0.Value.GetInt64(), false) @@ -1695,6 +1724,31 @@ func (c *compareFunctionClass) refineArgs(ctx sessionctx.Context, args []Express return c.refineArgsByUnsignedFlag(ctx, []Expression{finalArg0, finalArg1}) } +// see https://github.com/pingcap/tidb/issues/38361 for more details +func (c *compareFunctionClass) refineNumericConstantCmpDatetime(ctx sessionctx.Context, args []Expression, constArg *Constant, constArgIdx int) []Expression { + dt, err := constArg.Eval(chunk.Row{}) + if err != nil || dt.IsNull() { + return args + } + sc := ctx.GetSessionVars().StmtCtx + var datetimeDatum types.Datum + targetFieldType := types.NewFieldType(mysql.TypeDatetime) + datetimeDatum, err = dt.ConvertTo(sc, targetFieldType) + if err != nil || datetimeDatum.IsNull() { + return args + } + finalArg := Constant{ + Value: datetimeDatum, + RetType: targetFieldType, + DeferredExpr: nil, + ParamMarker: nil, + } + if constArgIdx == 0 { + return []Expression{&finalArg, args[1]} + } + return []Expression{args[0], &finalArg} +} + func (c *compareFunctionClass) refineArgsByUnsignedFlag(ctx sessionctx.Context, args []Expression) []Expression { // Only handle int cases, cause MySQL declares that `UNSIGNED` is deprecated for FLOAT, DOUBLE and DECIMAL types, // and support for it would be removed in a future version. diff --git a/planner/core/expression_rewriter_test.go b/planner/core/expression_rewriter_test.go index ee5f20d3f824c..7dd6a37bf08d1 100644 --- a/planner/core/expression_rewriter_test.go +++ b/planner/core/expression_rewriter_test.go @@ -421,3 +421,46 @@ func TestMultiColInExpression(t *testing.T) { tk.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Res...)) } } + +func TestCompareIssue38361(t *testing.T) { + store, clean := testkit.CreateMockStore(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + tk.MustExec("drop database if exists TEST1") + tk.MustExec("create database TEST1") + tk.MustExec("use TEST1") + tk.MustExec("create table t(a datetime, b bigint, c bigint)") + tk.MustExec("insert into t values(cast('2023-08-09 00:00:00' as datetime), 20230809, 20231310)") + + tk.MustQuery("select a > 20230809 from t").Check(testkit.Rows("0")) + tk.MustQuery("select a = 20230809 from t").Check(testkit.Rows("1")) + tk.MustQuery("select a < 20230810 from t").Check(testkit.Rows("1")) + // 20231310 can't be converted to valid datetime, thus should be compared using real date type,and datetime will be + // converted to something like 'YYYYMMDDHHMMSS', bigger than 20231310 + tk.MustQuery("select a < 20231310 from t").Check(testkit.Rows("0")) + tk.MustQuery("select 20230809 < a from t").Check(testkit.Rows("0")) + tk.MustQuery("select 20230809 = a from t").Check(testkit.Rows("1")) + tk.MustQuery("select 20230810 > a from t").Check(testkit.Rows("1")) + tk.MustQuery("select 20231310 > a from t").Check(testkit.Rows("0")) + + // constant datetime cmp numeric constant should be compared as real data type + tk.MustQuery("select cast('2023-08-09 00:00:00' as datetime) > 20230809 from t").Check(testkit.Rows("1")) + tk.MustQuery("select cast('2023-08-09 00:00:00' as datetime) = 20230809 from t").Check(testkit.Rows("0")) + tk.MustQuery("select cast('2023-08-09 00:00:00' as datetime) < 20230810 from t").Check(testkit.Rows("0")) + tk.MustQuery("select cast('2023-08-09 00:00:00' as datetime) < 20231310 from t").Check(testkit.Rows("0")) + tk.MustQuery("select 20230809 < cast('2023-08-09 00:00:00' as datetime) from t").Check(testkit.Rows("1")) + tk.MustQuery("select 20230809 = cast('2023-08-09 00:00:00' as datetime) from t").Check(testkit.Rows("0")) + tk.MustQuery("select 20230810 > cast('2023-08-09 00:00:00' as datetime) from t").Check(testkit.Rows("0")) + tk.MustQuery("select 20231310 > cast('2023-08-09 00:00:00' as datetime) from t").Check(testkit.Rows("0")) + + // datetime column cmp numeric column should be compared as real data type + tk.MustQuery("select a > b from t").Check(testkit.Rows("1")) + tk.MustQuery("select a = b from t").Check(testkit.Rows("0")) + tk.MustQuery("select a < b + 1 from t").Check(testkit.Rows("0")) + tk.MustQuery("select a < c from t").Check(testkit.Rows("0")) + tk.MustQuery("select b < a from t").Check(testkit.Rows("1")) + tk.MustQuery("select b = a from t").Check(testkit.Rows("0")) + tk.MustQuery("select b > a from t").Check(testkit.Rows("0")) + tk.MustQuery("select c > a from t").Check(testkit.Rows("0")) +}