Skip to content

Commit

Permalink
planner,expression: Limit projection pushed down scope to avoid poten…
Browse files Browse the repository at this point in the history
…tial performance degression (#53650)

close #53613
  • Loading branch information
yibin87 authored Jun 4, 2024
1 parent be86a25 commit d5e9c6e
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 44 deletions.
51 changes: 51 additions & 0 deletions pkg/expression/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,57 @@ func ContainCorrelatedColumn(exprs []Expression) bool {
return false
}

func jsonUnquoteFunctionBenefitsFromPushedDown(sf *ScalarFunction) bool {
arg0 := sf.GetArgs()[0]
// Only `->>` which parsed to JSONUnquote(CAST(JSONExtract() AS string)) can be pushed down to tikv
if fChild, ok := arg0.(*ScalarFunction); ok {
if fChild.FuncName.L == ast.Cast {
if fGrand, ok := fChild.GetArgs()[0].(*ScalarFunction); ok {
if fGrand.FuncName.L == ast.JSONExtract {
return true
}
}
}
}
return false
}

// ProjectionBenefitsFromPushedDown evaluates if the expressions can improve performance when pushed down to TiKV
// Projections are not pushed down to tikv by default, thus we need to check strictly here to avoid potential performance degradation.
// Note: virtual column is not considered here, since this function cares performance instead of functionality
func ProjectionBenefitsFromPushedDown(exprs []Expression, inputSchemaLen int) bool {
allColRef := true
colRefCount := 0
for _, expr := range exprs {
switch v := expr.(type) {
case *Column:
colRefCount = colRefCount + 1
continue
case *ScalarFunction:
allColRef = false
switch v.FuncName.L {
case ast.JSONDepth, ast.JSONLength, ast.JSONType, ast.JSONValid, ast.JSONContains, ast.JSONContainsPath,
ast.JSONExtract, ast.JSONKeys, ast.JSONSearch, ast.JSONMemberOf, ast.JSONOverlaps:
continue
case ast.JSONUnquote:
if jsonUnquoteFunctionBenefitsFromPushedDown(v) {
continue
}
return false
default:
return false
}
default:
return false
}
}
// For all col refs, only push down column pruning projections
if allColRef {
return colRefCount < inputSchemaLen
}
return true
}

// MaybeOverOptimized4PlanCache used to check whether an optimization can work
// for the statement when we enable the plan cache.
// In some situations, some optimizations maybe over-optimize and cache an
Expand Down
37 changes: 37 additions & 0 deletions pkg/expression/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,43 @@ func TestSQLDigestTextRetriever(t *testing.T) {
require.Equal(t, expectedGlobalResult, r.SQLDigestsMap)
}

func TestProjectionBenefitsFromPushedDown(t *testing.T) {
type testDataType struct {
exprs []Expression
inputSchemaLen int
expectResult bool
}
castFunc, _ := NewFunction(mock.NewContext(), ast.Cast, types.NewFieldType(mysql.TypeString), newFunctionWithMockCtx(ast.JSONExtract, newColJSON(), newColString("str", "binary")))
testDataArray := []testDataType{
{[]Expression{newColumn(0), newColumn(1)}, 5, true},
{[]Expression{newColumn(0), newColumn(1)}, 2, false},
{[]Expression{
newColumn(0),
newFunctionWithMockCtx(ast.JSONExtract, newColJSON(), newColString("str", "binary")),
newFunctionWithMockCtx(ast.JSONDepth, newColJSON()),
newFunctionWithMockCtx(ast.JSONLength, newColJSON()),
newFunctionWithMockCtx(ast.JSONType, newColJSON()),
newFunctionWithMockCtx(ast.JSONValid, newColJSON()),
newFunctionWithMockCtx(ast.JSONContains, newColJSON(), newColString("str", "binary")),
newFunctionWithMockCtx(ast.JSONContainsPath, newColJSON(), newConstString("str", CoercibilityNone, "str", "binary"), newColString("str", "binary"), newColString("str", "binary")),
newFunctionWithMockCtx(ast.JSONKeys, newColJSON()),
newFunctionWithMockCtx(ast.JSONSearch, newColJSON(), newConstString("str", CoercibilityNone, "str", "binary"), newColString("str", "binary")),
newFunctionWithMockCtx(ast.JSONMemberOf, newColString("str", "binary"), newColJSON()),
newFunctionWithMockCtx(ast.JSONOverlaps, newColJSON(), newColJSON()),
}, 3, true},
{[]Expression{
newFunctionWithMockCtx(ast.JSONUnquote, newColString("str", "binary")),
}, 3, false},
{[]Expression{
newFunctionWithMockCtx(ast.JSONUnquote, castFunc),
}, 3, true},
}
for _, testData := range testDataArray {
result := ProjectionBenefitsFromPushedDown(testData.exprs, testData.inputSchemaLen)
require.Equal(t, result, testData.expectResult)
}
}

func BenchmarkExtractColumns(b *testing.B) {
conditions := []Expression{
newFunctionWithMockCtx(ast.EQ, newColumn(0), newColumn(1)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,10 @@
{
"SQL": "desc format = 'brief' select md5(s) from t; -- we do generate mpp plan, while the cost-cmp failed",
"Plan": [
"TableReader 10000.00 root data:Projection",
"└─Projection 10000.00 cop[tikv] md5(test.t.s)->Column#13",
" └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo"
"Projection 10000.00 root md5(test.t.s)->Column#13",
"└─TableReader 10000.00 root MppVersion: 2, data:ExchangeSender",
" └─ExchangeSender 10000.00 mpp[tiflash] ExchangeType: PassThrough",
" └─TableFullScan 10000.00 mpp[tiflash] table:t keep order:false, stats:pseudo"
]
},
{
Expand Down
3 changes: 2 additions & 1 deletion pkg/planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -2767,7 +2767,8 @@ func (p *LogicalProjection) ExhaustPhysicalPlans(prop *property.PhysicalProperty
newProps = append(newProps, mppProp)
}
if newProp.TaskTp != property.CopSingleReadTaskType && ctx.GetSessionVars().AllowProjectionPushDown && p.CanPushToCop(kv.TiKV) &&
expression.CanExprsPushDown(pushDownCtx, p.Exprs, kv.TiKV) && !expression.ContainVirtualColumn(p.Exprs) {
expression.CanExprsPushDown(pushDownCtx, p.Exprs, kv.TiKV) && !expression.ContainVirtualColumn(p.Exprs) &&
expression.ProjectionBenefitsFromPushedDown(p.Exprs, p.Children()[0].Schema().Len()) {
copProp := newProp.CloneEssentialFields()
copProp.TaskTp = property.CopSingleReadTaskType
newProps = append(newProps, copProp)
Expand Down
20 changes: 0 additions & 20 deletions pkg/planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1188,26 +1188,6 @@ func TestIssue33175(t *testing.T) {
tk.MustQuery("select * from tmp2 where id <= -1 or id > 0 order by id asc;").Check(testkit.Rows("-2", "-1", "1", "2"))
}

func TestIssue35083(t *testing.T) {
defer func() {
variable.SetSysVar(variable.TiDBOptProjectionPushDown, variable.BoolToOnOff(config.GetGlobalConfig().Performance.ProjectionPushDown))
}()
defer config.RestoreFunc()()
config.UpdateGlobal(func(conf *config.Config) {
conf.Performance.ProjectionPushDown = true
})
variable.SetSysVar(variable.TiDBOptProjectionPushDown, variable.BoolToOnOff(config.GetGlobalConfig().Performance.ProjectionPushDown))
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t1 (a varchar(100), b int)")
tk.MustQuery("select @@tidb_opt_projection_push_down").Check(testkit.Rows("1"))
tk.MustQuery("explain format = 'brief' select cast(a as datetime) from t1").Check(testkit.Rows(
"TableReader 10000.00 root data:Projection",
"└─Projection 10000.00 cop[tikv] cast(test.t1.a, datetime BINARY)->Column#4",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"))
}

func TestRepeatPushDownToTiFlash(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,93 @@ Limit 1.00 root offset:2, count:1
set tidb_cost_model_version=2;
drop table if exists t;
create table t (a int, b real, i int, id int, value decimal(6,3), name char(128), d decimal(6,3), s char(128), t datetime, c bigint as ((a+1)) virtual, e real as ((b+a)));
create table t2(a json);
analyze table t;
analyze table t2;
set session tidb_opt_projection_push_down=1;
desc format = 'brief' select i * 2 from t;
desc format = 'brief' select a from t2;
id estRows task access object operator info
TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select a->'$.key0' from t2;
id estRows task access object operator info
TableReader 10000.00 root data:Projection
└─Projection 10000.00 cop[tikv] mul(planner__core__casetest__pushdown__push_down.t.i, 2)->Column#13
└─Projection 10000.00 cop[tikv] json_extract(planner__core__casetest__pushdown__push_down.t2.a, $.key0)->Column#3
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select a->>'$.key0' from t2;
id estRows task access object operator info
TableReader 10000.00 root data:Projection
└─Projection 10000.00 cop[tikv] json_unquote(cast(json_extract(planner__core__casetest__pushdown__push_down.t2.a, $.key0), var_string(16777216)))->Column#3
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_length(a) from t2;
id estRows task access object operator info
TableReader 10000.00 root data:Projection
└─Projection 10000.00 cop[tikv] json_length(planner__core__casetest__pushdown__push_down.t2.a)->Column#3
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_valid(a) from t2;
id estRows task access object operator info
TableReader 10000.00 root data:Projection
└─Projection 10000.00 cop[tikv] json_valid(planner__core__casetest__pushdown__push_down.t2.a)->Column#3
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_type(a) from t2;
id estRows task access object operator info
TableReader 10000.00 root data:Projection
└─Projection 10000.00 cop[tikv] json_type(planner__core__casetest__pushdown__push_down.t2.a)->Column#3
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_depth(a) from t2;
id estRows task access object operator info
Projection 10000.00 root json_depth(planner__core__casetest__pushdown__push_down.t2.a)->Column#3
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_contains(a, '$.key0')from t2;
id estRows task access object operator info
TableReader 10000.00 root data:Projection
└─Projection 10000.00 cop[tikv] json_contains(planner__core__casetest__pushdown__push_down.t2.a, cast($.key0, json BINARY))->Column#3
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_contains_path(a, 'one', '$.key0', '$.key1') from t2;
id estRows task access object operator info
Projection 10000.00 root json_contains_path(planner__core__casetest__pushdown__push_down.t2.a, one, $.key0, $.key1)->Column#3
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_keys(a) from t2;
id estRows task access object operator info
Projection 10000.00 root json_keys(planner__core__casetest__pushdown__push_down.t2.a)->Column#3
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select 'ab' MEMBER OF(a) from t2;
id estRows task access object operator info
TableReader 10000.00 root data:Projection
└─Projection 10000.00 cop[tikv] json_memberof(cast(ab, json BINARY), planner__core__casetest__pushdown__push_down.t2.a)->Column#3
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_overlaps(a, a) from t2;
id estRows task access object operator info
Projection 10000.00 root json_overlaps(planner__core__casetest__pushdown__push_down.t2.a, planner__core__casetest__pushdown__push_down.t2.a)->Column#3
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_search(a, 'one', 'key0') from t2;
id estRows task access object operator info
Projection 10000.00 root json_search(planner__core__casetest__pushdown__push_down.t2.a, one, key0)->Column#3
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select json_unquote(a) from t2;
id estRows task access object operator info
Projection 10000.00 root json_unquote(cast(planner__core__casetest__pushdown__push_down.t2.a, var_string(4294967295)))->Column#3
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
desc format = 'brief' select i * 2 from t;
id estRows task access object operator info
Projection 10000.00 root mul(planner__core__casetest__pushdown__push_down.t.i, 2)->Column#13
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
desc format = 'brief' select DATE_FORMAT(t, '%Y-%m-%d %H') as date from t;
id estRows task access object operator info
TableReader 10000.00 root data:Projection
└─Projection 10000.00 cop[tikv] date_format(planner__core__casetest__pushdown__push_down.t.t, %Y-%m-%d %H)->Column#13
Projection 10000.00 root date_format(planner__core__casetest__pushdown__push_down.t.t, %Y-%m-%d %H)->Column#13
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
desc format = 'brief' select md5(s) from t;
id estRows task access object operator info
TableReader 10000.00 root data:Projection
└─Projection 10000.00 cop[tikv] md5(planner__core__casetest__pushdown__push_down.t.s)->Column#13
Projection 10000.00 root md5(planner__core__casetest__pushdown__push_down.t.s)->Column#13
└─TableReader 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
desc format = 'brief' select c from t where a+1=3;
id estRows task access object operator info
Expand Down Expand Up @@ -99,19 +170,19 @@ StreamAgg 1.00 root funcs:sum(Column#16)->Column#14
desc format = 'brief' select * from (select id-2 as b from t) B join (select id-2 as b from t) A on A.b=B.b;
id estRows task access object operator info
HashJoin 10000.00 root inner join, equal:[eq(Column#13, Column#26)]
├─TableReader(Build) 8000.00 root data:Projection
│ └─Projection 8000.00 cop[tikv] minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#26
├─Projection(Build) 8000.00 root minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#26
│ └─TableReader 8000.00 root data:Selection
│ └─Selection 8000.00 cop[tikv] not(isnull(minus(planner__core__casetest__pushdown__push_down.t.id, 2)))
│ └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
└─TableReader(Probe) 8000.00 root data:Projection
└─Projection 8000.00 cop[tikv] minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#13
└─Projection(Probe) 8000.00 root minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#13
└─TableReader 8000.00 root data:Selection
└─Selection 8000.00 cop[tikv] not(isnull(minus(planner__core__casetest__pushdown__push_down.t.id, 2)))
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
desc format = 'brief' select * from t join (select id-2 as b from t) A on A.b=t.id;
id estRows task access object operator info
HashJoin 10000.00 root inner join, equal:[eq(planner__core__casetest__pushdown__push_down.t.id, Column#25)]
├─TableReader(Build) 8000.00 root data:Projection
│ └─Projection 8000.00 cop[tikv] minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#25
├─Projection(Build) 8000.00 root minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#25
│ └─TableReader 8000.00 root data:Selection
│ └─Selection 8000.00 cop[tikv] not(isnull(minus(planner__core__casetest__pushdown__push_down.t.id, 2)))
│ └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
└─TableReader(Probe) 9990.00 root data:Selection
Expand All @@ -120,17 +191,17 @@ HashJoin 10000.00 root inner join, equal:[eq(planner__core__casetest__pushdown_
desc format = 'brief' select * from t left join (select id-2 as b from t) A on A.b=t.id;
id estRows task access object operator info
HashJoin 10000.00 root left outer join, equal:[eq(planner__core__casetest__pushdown__push_down.t.id, Column#25)]
├─TableReader(Build) 8000.00 root data:Projection
│ └─Projection 8000.00 cop[tikv] minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#25
├─Projection(Build) 8000.00 root minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#25
│ └─TableReader 8000.00 root data:Selection
│ └─Selection 8000.00 cop[tikv] not(isnull(minus(planner__core__casetest__pushdown__push_down.t.id, 2)))
│ └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
└─TableReader(Probe) 10000.00 root data:TableFullScan
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
desc format = 'brief' select * from t right join (select id-2 as b from t) A on A.b=t.id;
id estRows task access object operator info
HashJoin 12487.50 root right outer join, equal:[eq(planner__core__casetest__pushdown__push_down.t.id, Column#25)]
├─TableReader(Build) 10000.00 root data:Projection
│ └─Projection 10000.00 cop[tikv] minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#25
├─Projection(Build) 10000.00 root minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#25
│ └─TableReader 10000.00 root data:TableFullScan
│ └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
└─TableReader(Probe) 9990.00 root data:Selection
└─Selection 9990.00 cop[tikv] not(isnull(planner__core__casetest__pushdown__push_down.t.id))
Expand All @@ -139,12 +210,12 @@ desc format = 'brief' select A.b, B.b from (select id-2 as b from t) B join (sel
id estRows task access object operator info
Projection 10000.00 root Column#26, Column#13
└─HashJoin 10000.00 root inner join, equal:[eq(Column#13, Column#26)]
├─TableReader(Build) 8000.00 root data:Projection
│ └─Projection 8000.00 cop[tikv] minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#26
├─Projection(Build) 8000.00 root minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#26
│ └─TableReader 8000.00 root data:Selection
│ └─Selection 8000.00 cop[tikv] not(isnull(minus(planner__core__casetest__pushdown__push_down.t.id, 2)))
│ └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
└─TableReader(Probe) 8000.00 root data:Projection
└─Projection 8000.00 cop[tikv] minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#13
└─Projection(Probe) 8000.00 root minus(planner__core__casetest__pushdown__push_down.t.id, 2)->Column#13
└─TableReader 8000.00 root data:Selection
└─Selection 8000.00 cop[tikv] not(isnull(minus(planner__core__casetest__pushdown__push_down.t.id, 2)))
└─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
desc format = 'brief' select A.id from t as A where exists (select 1 from t where t.id=A.id);
Expand Down
Loading

0 comments on commit d5e9c6e

Please sign in to comment.