apache
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala‎
Lines changed: 12 additions & 7 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala‎
Lines changed: 2 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/Aggregate.scala‎
Lines changed: 18 additions & 8 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/Aggregate.scala‎
Lines changed: 18 additions & 8 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala‎
Lines changed: 4 additions & 3 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala‎
Lines changed: 6 additions & 13 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala‎
Lines changed: 6 additions & 13 deletions
@@ -110,7 +110,11 @@ abstract class AggregateFunction2
    * buffer value of `avg(x)` will be 0 and the position of the first buffer value of `avg(y)`
    * will be 2.
    */
-  var mutableBufferOffset: Int = 0
+  protected var mutableBufferOffset: Int = 0
+
+  def withNewMutableBufferOffset(newMutableBufferOffset: Int): Unit = {
+    mutableBufferOffset = newMutableBufferOffset
+  }
 
   /**
    * The offset of this function's start buffer value in the
@@ -126,7 +130,11 @@ abstract class AggregateFunction2
    * buffer value of `avg(x)` will be 1 and the position of the first buffer value of `avg(y)`
    * will be 3 (position 0 is used for the value of key`).
    */
-  var inputBufferOffset: Int = 0
+  protected var inputBufferOffset: Int = 0
+
+  def withNewInputBufferOffset(newInputBufferOffset: Int): Unit = {
+    inputBufferOffset = newInputBufferOffset
+  }
 
   /** The schema of the aggregation buffer. */
   def bufferSchema: StructType
@@ -195,11 +203,8 @@ abstract class AlgebraicAggregate extends AggregateFunction2 with Serializable w
   override def bufferSchema: StructType = StructType.fromAttributes(bufferAttributes)
 
   override def initialize(buffer: MutableRow): Unit = {
-    var i = 0
-    while (i < bufferAttributes.size) {
-      buffer(i + mutableBufferOffset) = initialValues(i).eval()
-      i += 1
-    }
+    throw new UnsupportedOperationException(
+      "AlgebraicAggregate's initialize should not be called directly")
   }
 
   override final def update(buffer: MutableRow, input: InternalRow): Unit = {
 
@@ -137,6 +137,8 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
   }
 
   override def keyExpressions: Seq[Expression] = expressions
+
+  override def toString: String = s"${super.toString} numPartitions=$numPartitions"
 }
 
 /**
 
@@ -47,6 +47,9 @@ case class Aggregate(
   private[this] val allAggregateExpressions =
     nonCompleteAggregateExpressions ++ completeAggregateExpressions
 
+  private[this] val hasNonAlgebricAggregateFunctions =
+    !allAggregateExpressions.forall(_.aggregateFunction.isInstanceOf[AlgebraicAggregate])
+
   // Use the hybrid iterator if (1) unsafe is enabled, (2) the schemata of
   // grouping key and aggregation buffer is supported; and (3) all
   // aggregate functions are algebraic.
@@ -56,14 +59,13 @@ case class Aggregate(
         allAggregateExpressions.flatMap(_.aggregateFunction.bufferAttributes))
     val groupKeySchema: StructType =
       StructType.fromAttributes(groupingExpressions.map(_.toAttribute))
-    val resultSchema: StructType =
-      StructType.fromAttributes(resultExpressions.map(_.toAttribute))
+
     val schemaSupportsUnsafe: Boolean =
       UnsafeFixedWidthAggregationMap.supportsAggregationBufferSchema(aggregationBufferSchema) &&
-        UnsafeProjection.canSupport(groupKeySchema) &&
-        UnsafeProjection.canSupport(resultSchema)
+        UnsafeProjection.canSupport(groupKeySchema)
 
-    sqlContext.conf.unsafeEnabled && schemaSupportsUnsafe
+    // TODO: Use the hybrid iterator for non-algebric aggregate functions.
+    sqlContext.conf.unsafeEnabled && schemaSupportsUnsafe && !hasNonAlgebricAggregateFunctions
   }
 
   private[this] val hybridAggregateEnabled = sqlContext.conf.useHybridAggregate
@@ -74,9 +76,17 @@ case class Aggregate(
     groupingExpressions.nonEmpty && (!supportsHybridIterator || !hybridAggregateEnabled)
   }
 
-  override def canProcessUnsafeRows: Boolean = false // true
+  override def canProcessUnsafeRows: Boolean = !hasNonAlgebricAggregateFunctions
 
-  override def outputsUnsafeRows: Boolean = supportsHybridIterator
+  // If result expressions' data types are all fixed length, we generate unsafe rows
+  // (We have this requirement instead of check the result of UnsafeProjection.canSupport
+  // is because we use a mutable projection to generate the result).
+  override def outputsUnsafeRows: Boolean = {
+    // resultExpressions.map(_.dataType).forall(UnsafeRow.isFixedLength)
+    // TODO: Supports generating UnsafeRows. We can just re-enable the line above and fix
+    // any issue we get.
+    false
+  }
 
   override def output: Seq[Attribute] = resultExpressions.map(_.toAttribute)
 
@@ -131,7 +141,7 @@ case class Aggregate(
           newMutableProjection _,
           child.output,
           iter,
-          child.outputsUnsafeRows)
+          outputsUnsafeRows)
       } else {
         if (!hasInput && groupingExpressions.nonEmpty) {
           // This is a grouped aggregate and the input iterator is empty,
 
@@ -90,14 +90,14 @@ abstract class AggregationIterator(
         case _ =>
           // We only need to set inputBufferOffset for aggregate functions with mode
           // PartialMerge and Final.
-          func.inputBufferOffset = inputBufferOffset
+          func.withNewInputBufferOffset(inputBufferOffset)
           inputBufferOffset += func.bufferSchema.length
           func
       }
       // Set mutableBufferOffset for this function. It is important that setting
       // mutableBufferOffset happens after all potential bindReference operations
       // because bindReference will create a new instance of the function.
-      funcWithBoundReferences.mutableBufferOffset = mutableBufferOffset
+      funcWithBoundReferences.withNewMutableBufferOffset(mutableBufferOffset)
       mutableBufferOffset += funcWithBoundReferences.bufferSchema.length
       functions(i) = funcWithBoundReferences
       i += 1
@@ -347,7 +347,8 @@ abstract class AggregationIterator(
         }
         val algebraicEvalProjection = newMutableProjection(evalExpressions, bufferSchemata)()
         val aggregateResultSchema = nonCompleteAggregateAttributes ++ completeAggregateAttributes
-        val aggregateResult: MutableRow = new GenericMutableRow(aggregateResultSchema.length)
+        // TODO: Use unsafe row.
+        val aggregateResult = new GenericMutableRow(aggregateResultSchema.length)
         val resultProjection =
           newMutableProjection(
             resultExpressions, groupingKeyAttributes ++ aggregateResultSchema)()
 
@@ -59,7 +59,9 @@ class SortBasedAggregationIterator(
     val bufferRowSize: Int = bufferSchema.length
 
     val genericMutableBuffer = new GenericMutableRow(bufferRowSize)
-    val buffer = if (outputsUnsafeRows) {
+    val useUnsafeBuffer = bufferSchema.map(_.dataType).forall(UnsafeRow.isFixedLength)
+
+    val buffer = if (useUnsafeBuffer) {
       val unsafeProjection =
         UnsafeProjection.create(bufferSchema.map(_.dataType))
       unsafeProjection.apply(genericMutableBuffer)
@@ -70,7 +72,6 @@ class SortBasedAggregationIterator(
     buffer
   }
 
-
   ///////////////////////////////////////////////////////////////////////////
   // Mutable states for sort based aggregation.
   ///////////////////////////////////////////////////////////////////////////
@@ -96,8 +97,6 @@ class SortBasedAggregationIterator(
     // Now, we will start to find all rows belonging to this group.
     // We create a variable to track if we see the next group.
     var findNextPartition = false
-    println("sort key first in group " + toSafeKey(nextGroupingKey) + " value " + toSafeValue(firstRowInNextGroup) + " buffer " + toSafeBuffer(sortBasedAggregationBuffer))
-
     // firstRowInNextGroup is the first row of this group. We first process it.
     processRow(sortBasedAggregationBuffer, firstRowInNextGroup)
 
@@ -112,7 +111,6 @@ class SortBasedAggregationIterator(
       // Check if the current row belongs the current input row.
       if (currentGroupingKey == groupingKey) {
         processRow(sortBasedAggregationBuffer, currentRow)
-        println("sort key " + toSafeKey(groupingKey) + " value " + toSafeValue(currentRow) + " buffer " + toSafeBuffer(sortBasedAggregationBuffer))
 
         hasNext = inputKVIterator.next()
       } else {
@@ -143,7 +141,6 @@ class SortBasedAggregationIterator(
       processCurrentSortedGroup()
       // Generate output row for the current group.
       val outputRow = generateOutput(currentGroupingKey, sortBasedAggregationBuffer)
-      println("sort result " + toSafeResult(outputRow))
       // Initialize buffer values for the next group.
       initializeBuffer(sortBasedAggregationBuffer)
 
@@ -154,18 +151,12 @@ class SortBasedAggregationIterator(
     }
   }
 
-  val toSafeKey = FromUnsafeProjection(groupingKeyAttributes.map(_.dataType).toArray)
-  val toSafeValue = FromUnsafeProjection(valueAttributes.map(_.dataType).toArray)
-  val toSafeBuffer = FromUnsafeProjection(allAggregateFunctions.flatMap(_.bufferAttributes).map(_.dataType).toArray)
-
   protected def initialize(): Unit = {
     if (inputKVIterator.next()) {
       initializeBuffer(sortBasedAggregationBuffer)
-      println("first " + toSafeKey(inputKVIterator.getKey()) + " value " + toSafeValue(inputKVIterator.getValue()) + " buffer " + toSafeBuffer(sortBasedAggregationBuffer))
 
       nextGroupingKey = inputKVIterator.getKey().copy()
       firstRowInNextGroup = inputKVIterator.getValue().copy()
-      println("first " + toSafeKey(nextGroupingKey) + " value " + toSafeValue(firstRowInNextGroup) + " buffer " + toSafeBuffer(sortBasedAggregationBuffer))
 
       sortedInputHasNewGroup = true
     } else {
@@ -183,6 +174,7 @@ class SortBasedAggregationIterator(
 }
 
 object SortBasedAggregationIterator {
+  // scalastyle:off
   def createFromInputIterator(
       groupingExprs: Seq[NamedExpression],
       nonCompleteAggregateExpressions: Seq[AggregateExpression2],
@@ -196,7 +188,7 @@ object SortBasedAggregationIterator {
       inputAttributes: Seq[Attribute],
       inputIter: Iterator[InternalRow],
       outputsUnsafeRows: Boolean): SortBasedAggregationIterator = {
-    val kvIterator = if (outputsUnsafeRows) {
+    val kvIterator = if (UnsafeProjection.canSupport(groupingExprs)) {
       AggregationIterator.unsafeKVIterator(
         groupingExprs,
         inputAttributes,
@@ -244,4 +236,5 @@ object SortBasedAggregationIterator {
       newMutableProjection,
       outputsUnsafeRows)
   }
+  // scalastyle:on
 }
Original file line number	Diff line number	Diff line change
`@@ -137,6 +137,8 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)`
`137`	`137`	`}`
`138`	`138`
`139`	`139`	`override def keyExpressions: Seq[Expression] = expressions`
	`140`	`+`
	`141`	`+ override def toString: String = s"${super.toString} numPartitions=$numPartitions"`
`140`	`142`	`}`
`141`	`143`
`142`	`144`	`/**`