Use requiredChildDistribution in Limit

JoshRosen · JoshRosen · commit c02324ca36ad · 2015-07-13T16:31:12.000-07:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -346,12 +346,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.Sample(lb, ub, withReplacement, seed, planLater(child)) :: Nil
       case logical.LocalRelation(output, data) =>
         LocalTableScan(output, data) :: Nil
-      case logical.Limit(IntegerLiteral(limit), child) => {
-        val perPartitionLimit = execution.PartitionLocalLimit(limit, planLater(child))
-        val globalLimit = execution.PartitionLocalLimit(
-          limit, execution.Exchange(SinglePartition, Nil, perPartitionLimit))
+      case logical.Limit(IntegerLiteral(limit), child) =>
+        val perPartitionLimit = execution.Limit(global = false, limit, planLater(child))
+        val globalLimit = execution.Limit(global = true, limit, perPartitionLimit)
         globalLimit :: Nil
-      }
       case Unions(unionChildren) =>
         execution.Union(unionChildren.map(planLater)) :: Nil
       case logical.Except(left, right) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -25,11 +25,9 @@ import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
-import org.apache.spark.util.CompletionIterator
 import org.apache.spark.util.collection.ExternalSorter
 import org.apache.spark.util.collection.unsafe.sort.PrefixComparator
-import org.apache.spark.util.{CompletionIterator, MutablePair}
-import org.apache.spark.{HashPartitioner, SparkEnv}
+import org.apache.spark.util.CompletionIterator
 
 /**
  * :: DeveloperApi ::
@@ -109,11 +107,24 @@ case class Union(children: Seq[SparkPlan]) extends SparkPlan {
 
 /**
  * :: DeveloperApi ::
- * Take the first `limit` elements from each partition.
+ * Take the first `limit` elements.
+ *
+ * @param global if true, then this operator will take the first `limit` elements of the entire
+ *               input. If false, it will take the first `limit` elements of each partition.
+ * @param limit the number of elements to take.
+*  @param child the input data source.
  */
 @DeveloperApi
-case class PartitionLocalLimit(limit: Int, child: SparkPlan)
+case class Limit(global: Boolean, limit: Int, child: SparkPlan)
   extends UnaryNode {
+  override def requiredChildDistribution: List[Distribution] = {
+    if (global) {
+      AllTuples :: Nil
+    } else {
+      UnspecifiedDistribution :: Nil
+    }
+  }
+
   override def output: Seq[Attribute] = child.output
 
   override def executeCollect(): Array[Row] = child.executeTake(limit)