-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-26390][SQL] ColumnPruning rule should only do column pruning #23343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -93,7 +93,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) | |
| RewriteCorrelatedScalarSubquery, | ||
| EliminateSerialization, | ||
| RemoveRedundantAliases, | ||
| RemoveRedundantProject, | ||
| RemoveNoopOperators, | ||
| SimplifyExtractValueOps, | ||
| CombineConcats) ++ | ||
| extendedOperatorOptimizationRules | ||
|
|
@@ -177,7 +177,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) | |
| RewritePredicateSubquery, | ||
| ColumnPruning, | ||
| CollapseProject, | ||
| RemoveRedundantProject) :+ | ||
| RemoveNoopOperators) :+ | ||
| Batch("UpdateAttributeReferences", Once, | ||
| UpdateNullabilityInAttributeReferences) | ||
| } | ||
|
|
@@ -403,11 +403,15 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { | |
| } | ||
|
|
||
| /** | ||
| * Remove projections from the query plan that do not make any modifications. | ||
| * Remove no-op operators from the query plan that do not make any modifications. | ||
| */ | ||
| object RemoveRedundantProject extends Rule[LogicalPlan] { | ||
| object RemoveNoopOperators extends Rule[LogicalPlan] { | ||
| def apply(plan: LogicalPlan): LogicalPlan = plan transform { | ||
| case p @ Project(_, child) if p.output == child.output => child | ||
| // Eliminate no-op Projects | ||
| case p @ Project(_, child) if child.sameOutput(p) => child | ||
|
|
||
| // Eliminate no-op Window | ||
| case w: Window if w.windowExpressions.isEmpty => w.child | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan . Is this too small to move out as a separate file during this refactoring?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's small and it has been here for a while. We can move many rules here to separated files in another PR. |
||
|
|
||
|
|
@@ -602,17 +606,12 @@ object ColumnPruning extends Rule[LogicalPlan] { | |
| p.copy(child = w.copy( | ||
| windowExpressions = w.windowExpressions.filter(p.references.contains))) | ||
|
|
||
| // Eliminate no-op Window | ||
| case w: Window if w.windowExpressions.isEmpty => w.child | ||
|
|
||
| // Eliminate no-op Projects | ||
| case p @ Project(_, child) if child.sameOutput(p) => child | ||
|
|
||
| // Can't prune the columns on LeafNode | ||
| case p @ Project(_, _: LeafNode) => p | ||
|
|
||
| // for all other logical plans that inherits the output from it's children | ||
| case p @ Project(_, child) => | ||
| // Project over project is handled by the first case, skip it here. | ||
| case p @ Project(_, child) if !child.isInstanceOf[Project] => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in case the child is a project, shall we anyway update it with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already handled project over project at L542
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I see, makes sense, thanks.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This requires a code comment. I believe the others will ask the same q when we read the problem again. |
||
| val required = child.references ++ p.references | ||
| if (!child.inputSet.subsetOf(required)) { | ||
| val newChildren = child.children.map(c => prunedChild(c, required)) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,7 @@ class ColumnPruningSuite extends PlanTest { | |
| val batches = Batch("Column pruning", FixedPoint(100), | ||
| PushDownPredicate, | ||
| ColumnPruning, | ||
| RemoveNoopOperators, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this added to remove top
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. without this, a lot more tests need to be updated...
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, I see. :) |
||
| CollapseProject) :: Nil | ||
| } | ||
|
|
||
|
|
@@ -340,10 +341,8 @@ class ColumnPruningSuite extends PlanTest { | |
| test("Column pruning on Union") { | ||
| val input1 = LocalRelation('a.int, 'b.string, 'c.double) | ||
| val input2 = LocalRelation('c.int, 'd.string, 'e.double) | ||
| val query = Project('b :: Nil, | ||
| Union(input1 :: input2 :: Nil)).analyze | ||
| val expected = Project('b :: Nil, | ||
| Union(Project('b :: Nil, input1) :: Project('d :: Nil, input2) :: Nil)).analyze | ||
| val query = Project('b :: Nil, Union(input1 :: input2 :: Nil)).analyze | ||
| val expected = Union(Project('b :: Nil, input1) :: Project('d :: Nil, input2) :: Nil).analyze | ||
| comparePlans(Optimize.execute(query), expected) | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,8 +27,9 @@ class CombiningLimitsSuite extends PlanTest { | |
|
|
||
| object Optimize extends RuleExecutor[LogicalPlan] { | ||
| val batches = | ||
| Batch("Filter Pushdown", FixedPoint(100), | ||
| ColumnPruning) :: | ||
| Batch("Column Pruning", FixedPoint(100), | ||
| ColumnPruning, | ||
| RemoveNoopOperators) :: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Looks not precise to have |
||
| Batch("Combine Limit", FixedPoint(10), | ||
| CombineLimits) :: | ||
| Batch("Constant Folding", FixedPoint(10), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
RemoveUselessOperators?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think Noop is fine too. It's no-op, right :-)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes it is fine too, I preferred
Uselessbecause they are actually doing something, so they introduce a useless overhead, anyway not a big deal