Address comment.

viirya · viirya · commit 29a0c70488fc · 2016-05-11T05:13:01.000Z
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -103,7 +103,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: CatalystConf)
       SimplifyCaseConversionExpressions,
       RewriteCorrelatedScalarSubquery,
       EliminateSerialization,
-      RemoveExtraProjectForSerialization) ::
+      RemoveAliasOnlyProject) ::
     Batch("Decimal Optimizations", fixedPoint,
       DecimalAggregates) ::
     Batch("Typed Filter Optimization", fixedPoint,
@@ -157,21 +157,52 @@ object SamplePushDown extends Rule[LogicalPlan] {
 }
 
 /**
- * Removes extra Project added in EliminateSerialization rule.
+ * Removes the Project only conducting Alias of its child node.
+ * It is created mainly for removing extra Project added in EliminateSerialization rule,
+ * but can also benefit other operators.
  */
-object RemoveExtraProjectForSerialization extends Rule[LogicalPlan] {
+object RemoveAliasOnlyProject extends Rule[LogicalPlan] {
+  // Check if projectList in the Project node has the same attribute names and ordering
+  // as its child node.
+  private def checkAliasOnly(
+      projectList: Seq[NamedExpression],
+      childOutput: Seq[Attribute]): Boolean = {
+    if (!projectList.forall(_.isInstanceOf[Alias]) || projectList.length != childOutput.length) {
+      return false
+    } else {
+      projectList.map(_.asInstanceOf[Alias]).zip(childOutput).forall { case (a, o) =>
+        a.child match {
+          case attr: Attribute
+              if a.name == attr.name && attr.name == o.name && attr.dataType == o.dataType
+                && attr.exprId == o.exprId =>
+            true
+          case _ => false
+        }
+      }
+    }
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = {
-    val objectProject = plan.find(_.isInstanceOf[ObjectProject]).map { case o: ObjectProject =>
-      val replaceFrom = o.outputObjAttr
-      val replaceTo = o.child.output.head
+    val processedPlan = plan.find { p =>
+      p match {
+        case Project(pList, child) if checkAliasOnly(pList, child.output) => true
+        case _ => false
+      }
+    }.map { case p: Project =>
+      val attrMap = p.projectList.map { a =>
+        val alias = a.asInstanceOf[Alias]
+        val replaceFrom = alias.toAttribute
+        val replaceTo = alias.child.asInstanceOf[Attribute]
+        (replaceFrom, replaceTo)
+      }.toMap
       plan.transformAllExpressions {
-        case a: Attribute if a.equals(replaceFrom) => replaceTo
+        case a: Attribute if attrMap.contains(a) => attrMap(a)
       }.transform {
-        case op: ObjectProject if o == op => op.child
+        case op: Project if op == p => op.child
       }
     }
-    if (objectProject.isDefined) {
-      objectProject.get
+    if (processedPlan.isDefined) {
+      processedPlan.get
     } else {
       plan
     }
@@ -186,9 +217,10 @@ object EliminateSerialization extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case d @ DeserializeToObject(_, _, s: SerializeFromObject)
         if d.outputObjectType == s.inputObjectType =>
-      // Adds an extra ObjectProject here, to preserve the output expr id of `DeserializeToObject`.
-      // We will remove it later.
-      ObjectProject(d.output.head, s.child)
+      // Adds an extra Project here, to preserve the output expr id of `DeserializeToObject`.
+      // We will remove it later in RemoveAliasOnlyProject rule.
+      val objAttr = Alias(s.child.output.head, "obj")(exprId = d.output.head.exprId)
+      Project(objAttr :: Nil, s.child)
     case a @ AppendColumns(_, _, _, s: SerializeFromObject)
         if a.deserializer.dataType == s.inputObjectType =>
       AppendColumnsWithObject(a.func, s.serializer, a.serializer, s.child)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -80,15 +80,6 @@ trait ObjectConsumer extends UnaryNode {
   def inputObjectType: DataType = child.output.head.dataType
 }
 
-/**
- * Takes the object from child and projects it as new attribute.
- * This logical plan is just used to preserve expr id temporarily and will be removed before
- * the end of optimization phase.
- */
-case class ObjectProject(
-    outputObjAttr: Attribute,
-    child: LogicalPlan) extends UnaryNode with ObjectProducer
-
 /**
  * Takes the input row from child and turns it into object using the given deserializer expression.
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSerializationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSerializationSuite.scala
@@ -42,7 +42,7 @@ class EliminateSerializationSuite extends PlanTest {
     val input = LocalRelation('obj.obj(classOf[(Int, Int)]))
     val plan = input.serialize[(Int, Int)].deserialize[(Int, Int)].analyze
     val optimized = Optimize.execute(plan)
-    val expected = ObjectProject(input.output.head.withNullability(false), input)
+    val expected = input.select('obj.as("obj")).analyze
     comparePlans(optimized, expected)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, ObjectProject}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.types.BooleanType
 
@@ -47,9 +47,10 @@ class TypedFilterOptimizationSuite extends PlanTest {
     val query = input.filter(f1).filter(f2).analyze
 
     val optimized = Optimize.execute(query)
-    val deserialized = input.deserialize[(Int, Int)]
-    val expected = ObjectProject(deserialized.output.head, deserialized
-      .where(callFunction(f1, BooleanType, 'obj)))
+
+    val expected = input.deserialize[(Int, Int)]
+      .where(callFunction(f1, BooleanType, 'obj))
+      .select('obj.as("obj"))
       .where(callFunction(f2, BooleanType, 'obj))
       .serialize[(Int, Int)].analyze
 

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ class EliminateSerializationSuite extends PlanTest {`
`42`	`42`	`val input = LocalRelation('obj.obj(classOf[(Int, Int)]))`
`43`	`43`	`val plan = input.serialize[(Int, Int)].deserialize[(Int, Int)].analyze`
`44`	`44`	`val optimized = Optimize.execute(plan)`
`45`		`- val expected = ObjectProject(input.output.head.withNullability(false), input)`
	`45`	`+ val expected = input.select('obj.as("obj")).analyze`
`46`	`46`	`comparePlans(optimized, expected)`
`47`	`47`	`}`
`48`	`48`