Fix test.

apache · Oct 24, 2016 · 76a3eaf · 76a3eaf
1 parent 3d24f79
commit 76a3eaf
Show file tree

Hide file tree

Showing 5 changed files with 56 additions and 9 deletions.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java b/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java
@@ -42,7 +42,12 @@ public boolean hasNext() throws IOException {
     if (!shouldStop()) {
       processNext();
     }
-    return !currentRows.isEmpty();
+    boolean hasNext = !currentRows.isEmpty();
+    // If no more data available, releases resource if necessary.
+    if (!hasNext) {
+      releaseResource();
+    }
+    return hasNext;
   }
 
   public InternalRow next() {
@@ -91,4 +96,9 @@ protected void incPeakExecutionMemory(long size) {
    * After it's called, if currentRow is still null, it means no more rows left.
    */
   protected abstract void processNext() throws IOException;
+
+  /**
+   * Releases resources if necessary. No-op in default.
+   */
+  protected void releaseResource() {}
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.execution.metric.SQLMetrics
 
 
@@ -28,11 +29,13 @@ import org.apache.spark.sql.execution.metric.SQLMetrics
  */
 case class LocalTableScanExec(
     output: Seq[Attribute],
-    rows: Seq[InternalRow]) extends LeafExecNode {
+    rows: Seq[InternalRow]) extends LeafExecNode with CodegenSupport {
 
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
+  override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(rdd)
+
   private val unsafeRows: Array[InternalRow] = {
     if (rows.isEmpty) {
       Array.empty
@@ -47,6 +50,22 @@ case class LocalTableScanExec(
 
   private lazy val rdd = sqlContext.sparkContext.parallelize(unsafeRows, numParallelism)
 
+  protected override def doProduce(ctx: CodegenContext): String = {
+    val numOutput = metricTerm(ctx, "numOutputRows")
+    val input = ctx.freshName("input")
+    // Right now, LocalTableScanExec is only used when there is one upstream.
+    ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];")
+    val row = ctx.freshName("row")
+    s"""
+       | while ($input.hasNext()) {
+       |   InternalRow $row = (InternalRow) $input.next();
+       |   $numOutput.add(1);
+       |   ${consume(ctx, null, row).trim}
+       |   if (shouldStop()) return;
+       | }
+     """.stripMargin
+  }
+
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
     rdd.map { r =>

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -661,6 +661,15 @@ case class HashAggregateExec(
          """.stripMargin
     }
 
+    ctx.addNewFunction("releaseResource", s"""
+      @Override
+      protected void releaseResource() {
+        $iterTerm.close();
+        if ($sorterTerm == null) {
+          $hashMapTerm.free();
+        }
+      }
+    """)
 
     val aggTime = metricTerm(ctx, "aggTime")
     val beforeAgg = ctx.freshName("beforeAgg")

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -37,9 +37,10 @@ case class CollectLimitExec(limit: Int, child: SparkPlan) extends UnaryExecNode
   override def output: Seq[Attribute] = child.output
   override def outputPartitioning: Partitioning = SinglePartition
   override def requiredChildDistribution: List[Distribution] = AllTuples :: Nil
-  override def executeCollect(): Array[InternalRow] = child match {
-    case e: Exchange => e.child.executeTake(limit)
-    case _ => child.executeTake(limit)
+  override def executeCollect(): Array[InternalRow] = {
+    child.collect {
+      case l: LocalLimitExec => l
+    }.head.child.executeTake(limit)
   }
 
   protected override def doExecute(): RDD[InternalRow] = {

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.SortOrder
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.execution.aggregate
+import org.apache.spark.sql.execution.LocalLimitExec
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -2684,11 +2685,18 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
   }
 
   test("SPARK-17515: CollectLimit.execute() should perform per-partition limits") {
+    val df = spark.range(1, 100, 1, numPartitions = 10).limit(1)
+    val localLimit = df.queryExecution.executedPlan.collect {
+      case l: LocalLimitExec => l
+    }
+    assert(localLimit.nonEmpty)
     val numRecordsRead = spark.sparkContext.longAccumulator
-    spark.range(1, 100, 1, numPartitions = 10).map { x =>
-      numRecordsRead.add(1)
-      x
-    }.limit(1).queryExecution.toRdd.count()
+    localLimit.head.execute().mapPartitionsInternal { iter =>
+      iter.map { x =>
+        numRecordsRead.add(1)
+        x
+      }
+    }.count
     assert(numRecordsRead.value === 10)
   }