Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: fix incorrect maintenance of handleColHelper for recursive CTE #55732

Merged
merged 13 commits into from
Sep 6, 2024
40 changes: 35 additions & 5 deletions pkg/planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -4168,6 +4168,9 @@ func getLatestVersionFromStatsTable(ctx sessionctx.Context, tblInfo *model.Table
return version
}

// tryBuildCTE considers the input tn as a reference to a CTE and tries to build the logical plan for it like building
// DataSource for normal tables.
// tryBuildCTE will push an entry into handleHelper when successful.
func (b *PlanBuilder) tryBuildCTE(ctx context.Context, tn *ast.TableName, asName *model.CIStr) (base.LogicalPlan, error) {
for i := len(b.outerCTEs) - 1; i >= 0; i-- {
cte := b.outerCTEs[i]
Expand All @@ -4192,6 +4195,7 @@ func (b *PlanBuilder) tryBuildCTE(ctx context.Context, tn *ast.TableName, asName
p := logicalop.LogicalCTETable{Name: cte.def.Name.String(), IDForStorage: cte.storageID, SeedStat: cte.seedStat, SeedSchema: cte.seedLP.Schema()}.Init(b.ctx, b.getSelectOffset())
p.SetSchema(getResultCTESchema(cte.seedLP.Schema(), b.ctx.GetSessionVars()))
p.SetOutputNames(cte.seedLP.OutputNames())
b.handleHelper.pushMap(nil)
return p, nil
}

Expand Down Expand Up @@ -6895,6 +6899,8 @@ func isJoinHintSupportedInMPPMode(preferJoinType uint) bool {
return onesCount < 1
}

// buildCte prepares for a CTE. It works together with buildWith().
// It will push one entry into b.handleHelper.
func (b *PlanBuilder) buildCte(ctx context.Context, cte *ast.CommonTableExpression, isRecursive bool) (p base.LogicalPlan, err error) {
saveBuildingCTE := b.buildingCTE
b.buildingCTE = true
Expand Down Expand Up @@ -6930,6 +6936,7 @@ func (b *PlanBuilder) buildCte(ctx context.Context, cte *ast.CommonTableExpressi
}

// buildRecursiveCTE handles the with clause `with recursive xxx as xx`.
// It will push one entry into b.handleHelper.
func (b *PlanBuilder) buildRecursiveCTE(ctx context.Context, cte ast.ResultSetNode) error {
b.isCTE = true
cInfo := b.outerCTEs[len(b.outerCTEs)-1]
Expand Down Expand Up @@ -6963,10 +6970,33 @@ func (b *PlanBuilder) buildRecursiveCTE(ctx context.Context, cte ast.ResultSetNo
var afterOpr *ast.SetOprType
switch y := x.SelectList.Selects[i].(type) {
case *ast.SelectStmt:
originalLen := b.handleHelper.stackTail
p, err = b.buildSelect(ctx, y)
if err == nil {
b.handleHelper.popMap()
} else {
// Be careful with this tricky case. One error is expected here when building the first recursive
// part, however, the b.handleHelper won't be restored if error occurs, which means there could be
// more than one entry pushed into b.handleHelper without being poped.
// For example: with recursive cte1 as (select ... union all select ... from tbl join cte1 ...) ...
// This violates the semantic of buildSelect(), which should only push exactly one entry into
// b.handleHelper. So we use a special logic to restore the b.handleHelper here.
for b.handleHelper.stackTail > originalLen {
b.handleHelper.popMap()
}
}
afterOpr = y.AfterSetOperator
case *ast.SetOprSelectList:
originalLen := b.handleHelper.stackTail
p, err = b.buildSetOpr(ctx, &ast.SetOprStmt{SelectList: y, With: y.With})
if err == nil {
b.handleHelper.popMap()
} else {
// ditto. See comments above.
for b.handleHelper.stackTail > originalLen {
b.handleHelper.popMap()
}
}
afterOpr = y.AfterSetOperator
}

Expand Down Expand Up @@ -7000,14 +7030,11 @@ func (b *PlanBuilder) buildRecursiveCTE(ctx context.Context, cte ast.ResultSetNo
// Build seed part plan.
saveSelect := x.SelectList.Selects
x.SelectList.Selects = x.SelectList.Selects[:i]
// We're rebuilding the seed part, so we pop the result we built previously.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I ask why we need to remove this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The popMap() here is moved right after b.buildSelect() and b.buildSetOpr() in this function.
This should make it more clear and explicit that we directly drop the entries pushed by them.

for _i := 0; _i < i; _i++ {
b.handleHelper.popMap()
}
p, err = b.buildSetOpr(ctx, x)
if err != nil {
return err
}
b.handleHelper.popMap()
x.SelectList.Selects = saveSelect
p, err = b.adjustCTEPlanOutputName(p, cInfo.def)
if err != nil {
Expand Down Expand Up @@ -7076,6 +7103,7 @@ func (b *PlanBuilder) buildRecursiveCTE(ctx context.Context, cte ast.ResultSetNo
limit.SetChildren(limit.Children()[:0]...)
cInfo.limitLP = limit
}
b.handleHelper.pushMap(nil)
return nil
default:
p, err := b.buildResultSetNode(ctx, x, true)
Expand Down Expand Up @@ -7171,7 +7199,9 @@ func (b *PlanBuilder) buildWith(ctx context.Context, w *ast.WithClause) ([]*cteI
b.outerCTEs[len(b.outerCTEs)-1].optFlag = b.optFlag
b.outerCTEs[len(b.outerCTEs)-1].isBuilding = false
b.optFlag = saveFlag
// each cte (select statement) will generate a handle map, pop it out here.
// buildCte() will push one entry into handleHelper. As said in comments for b.handleHelper, building CTE
// should not affect the handleColHelper, so we pop it out here, then buildWith() as a whole will not modify
// the handleColHelper.
b.handleHelper.popMap()
ctes = append(ctes, b.outerCTEs[len(b.outerCTEs)-1])
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/planner/core/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ type PlanBuilder struct {
// If it's a aggregation, we pop the map and push a nil map since no handle information left.
// If it's a union, we pop all children's and push a nil map.
// If it's a join, we pop its children's out then merge them and push the new map to stack.
// If we meet a subquery, it's clearly that it's a independent problem so we just pop one map out when we finish building the subquery.
// If we meet a subquery or CTE, it's clearly that it's an independent problem so we just pop one map out when we
// finish building the subquery or CTE.
handleHelper *handleColHelper

hintProcessor *hint.QBHintHandler
Expand Down
64 changes: 64 additions & 0 deletions tests/integrationtest/r/cte.result
Original file line number Diff line number Diff line change
Expand Up @@ -1271,3 +1271,67 @@ FROM product_detail
col_4
Product A
Product A
drop table if exists t1, t2;
create table t1(a int, b int);
create table t2(a int, b int);
insert into t1 value(5,5);
insert into t2 value(1,1);
with recursive cte1 as (select 1 as a union all select cte1.a+1 from t1 join cte1 on t1.a > cte1.a) select * from cte1;
a
1
2
3
4
5
update t2 set b=2 where a in (with recursive cte1 as (select 1 as a union all select cte1.a+1 from t1 join cte1 on t1.a > cte1.a) select * from cte1);
select * from t2;
a b
1 2
delete from t2 where a in (with recursive cte1 as (select 1 as a union all select cte1.a+1 from t1 join cte1 on t1.a > cte1.a) select * from cte1);
select * from t2;
a b
drop table if exists table_abc1;
drop table if exists table_abc2;
drop table if exists table_abc3;
drop table if exists table_abc4;
CREATE TABLE `table_abc1` (
`column_abc1` varchar(10) DEFAULT NULL,
`column_abc2` varchar(10) DEFAULT NULL,
`column_abc3` varchar(10) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
CREATE TABLE `table_abc3` (
`column_abc5` varchar(10) DEFAULT NULL,
`column_abc6` varchar(10) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
CREATE TABLE `table_abc4` (
`column_abc3` varchar(10) DEFAULT NULL,
`column_abc7` varchar(10) DEFAULT NULL,
`column_abc5` varchar(10) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
INSERT INTO `table_abc1` VALUES ('KTL157','KTL157','KTL157');
INSERT INTO `table_abc3` VALUES ('1000','20240819');
INSERT INTO `table_abc4` VALUES ('KTL157','test','1000');
DELETE FROM table_abc3 t_abc3
WHERE t_abc3.column_abc5 IN (
SELECT a.column_abc5
FROM (
WITH tree_cte1 AS (
WITH RECURSIVE tree_cte AS (
SELECT t.column_abc1, t.column_abc2, t.column_abc3, 0 AS lv
FROM table_abc1 t
WHERE t.column_abc1 IN ('KTL157', 'KTL159')
UNION ALL
SELECT t.column_abc1, t.column_abc2, t.column_abc3, tcte.lv + 1
FROM table_abc1 t
JOIN tree_cte tcte ON t.column_abc1 = tcte.column_abc2
WHERE tcte.lv <= 1
)
SELECT * FROM tree_cte
)
SELECT e.column_abc5
FROM (
SELECT DISTINCT * FROM tree_cte1
) aa
LEFT JOIN table_abc4 e ON e.column_abc3 = aa.column_abc3
) a
);
67 changes: 67 additions & 0 deletions tests/integrationtest/t/cte.test
Original file line number Diff line number Diff line change
Expand Up @@ -680,3 +680,70 @@ SELECT col_4
FROM product_detail
) a;

# Tests for PR #55732
drop table if exists t1, t2;
create table t1(a int, b int);
create table t2(a int, b int);
insert into t1 value(5,5);
insert into t2 value(1,1);
with recursive cte1 as (select 1 as a union all select cte1.a+1 from t1 join cte1 on t1.a > cte1.a) select * from cte1;
# This UPDATE should update t2.b to 2
update t2 set b=2 where a in (with recursive cte1 as (select 1 as a union all select cte1.a+1 from t1 join cte1 on t1.a > cte1.a) select * from cte1);
select * from t2;
# This DELETE should delete all rows in t2
delete from t2 where a in (with recursive cte1 as (select 1 as a union all select cte1.a+1 from t1 join cte1 on t1.a > cte1.a) select * from cte1);
select * from t2;

# Issue #55666
drop table if exists table_abc1;
drop table if exists table_abc2;
drop table if exists table_abc3;
drop table if exists table_abc4;

CREATE TABLE `table_abc1` (
`column_abc1` varchar(10) DEFAULT NULL,
`column_abc2` varchar(10) DEFAULT NULL,
`column_abc3` varchar(10) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;

CREATE TABLE `table_abc3` (
`column_abc5` varchar(10) DEFAULT NULL,
`column_abc6` varchar(10) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;

CREATE TABLE `table_abc4` (
`column_abc3` varchar(10) DEFAULT NULL,
`column_abc7` varchar(10) DEFAULT NULL,
`column_abc5` varchar(10) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;

INSERT INTO `table_abc1` VALUES ('KTL157','KTL157','KTL157');
INSERT INTO `table_abc3` VALUES ('1000','20240819');
INSERT INTO `table_abc4` VALUES ('KTL157','test','1000');

DELETE FROM table_abc3 t_abc3
WHERE t_abc3.column_abc5 IN (
SELECT a.column_abc5
FROM (
WITH tree_cte1 AS (
WITH RECURSIVE tree_cte AS (
SELECT t.column_abc1, t.column_abc2, t.column_abc3, 0 AS lv
FROM table_abc1 t
WHERE t.column_abc1 IN ('KTL157', 'KTL159')
UNION ALL
SELECT t.column_abc1, t.column_abc2, t.column_abc3, tcte.lv + 1
FROM table_abc1 t
JOIN tree_cte tcte ON t.column_abc1 = tcte.column_abc2
WHERE tcte.lv <= 1
)
SELECT * FROM tree_cte
)
SELECT e.column_abc5
FROM (
SELECT DISTINCT * FROM tree_cte1
) aa
LEFT JOIN table_abc4 e ON e.column_abc3 = aa.column_abc3
) a
);