diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 7455e68ee8f64..27ea0728c00b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -381,6 +381,10 @@ object ColumnPruning extends Rule[LogicalPlan] { p } + // Eliminate the child from the Projects with no references to its child + case p @ Project(projectList, child) if p.references.intersect(child.outputSet).isEmpty => + Project(projectList, OneRowRelation) + // Can't prune the columns on LeafNode case p @ Project(_, l: LeafNode) => p diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala index d09601e0343d7..5fdcf0c830e8c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala @@ -284,5 +284,19 @@ class ColumnPruningSuite extends PlanTest { comparePlans(Optimize.execute(plan1.analyze), correctAnswer1) } + test("Eliminate the Project with no references to its child") { + val expectedInput = OneRowRelation + val expected = Project(Literal(1).as("1") :: Nil, expectedInput).analyze + + val input = LocalRelation('key.int, 'value.string) + val query1 = + Project(Literal(1).as("1") :: Nil, Project(Literal(1).as("1") :: Nil, input)).analyze + comparePlans(Optimize.execute(query1), expected) + + val query2 = + Project(Literal(1).as("1") :: Nil, Project(Nil, input)).analyze + comparePlans(Optimize.execute(query2), expected) + } + // todo: add more tests for column pruning }