apache
diff --git a/‎.github/PULL_REQUEST_TEMPLATE‎
Lines changed: 12 additions & 0 deletions b/‎.github/PULL_REQUEST_TEMPLATE‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎R/pkg/R/pairRDD.R‎
Lines changed: 5 additions & 5 deletions b/‎R/pkg/R/pairRDD.R‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎build/mvn‎
Lines changed: 1 addition & 1 deletion b/‎build/mvn‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/Accumulator.scala‎
Lines changed: 3 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/Accumulator.scala‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala‎
Lines changed: 16 additions & 16 deletions b/‎core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala‎
Lines changed: 29 additions & 26 deletions b/‎core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala‎
Lines changed: 29 additions & 26 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala‎
Lines changed: 12 additions & 12 deletions b/‎core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/RDD.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/rdd/RDD.scala‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,12 @@
+## What changes were proposed in this pull request?
+
+(Please fill in changes proposed in this fix)
+
+
+## How was the this patch tested?
+
+(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
+
+
+(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)
+
@@ -305,11 +305,11 @@ setMethod("groupByKey",
 #'  Merge values by key
 #'
 #' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' and merges the values for each key using an associative reduce function.
+#' and merges the values for each key using an associative and commutative reduce function.
 #'
 #' @param x The RDD to reduce by key. Should be an RDD where each element is
 #'             list(K, V) or c(K, V).
-#' @param combineFunc The associative reduce function to use.
+#' @param combineFunc The associative and commutative reduce function to use.
 #' @param numPartitions Number of partitions to create.
 #' @return An RDD where each element is list(K, V') where V' is the merged
 #'         value
@@ -347,12 +347,12 @@ setMethod("reduceByKey",
 #' Merge values by key locally
 #'
 #' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' and merges the values for each key using an associative reduce function, but return the
-#' results immediately to the driver as an R list.
+#' and merges the values for each key using an associative and commutative reduce function, but
+#' return the results immediately to the driver as an R list.
 #'
 #' @param x The RDD to reduce by key. Should be an RDD where each element is
 #'             list(K, V) or c(K, V).
-#' @param combineFunc The associative reduce function to use.
+#' @param combineFunc The associative and commutative reduce function to use.
 #' @return A list of elements of type list(K, V') where V' is the merged value for each key
 #' @seealso reduceByKey
 #' @examples
 
@@ -69,7 +69,7 @@ install_app() {
 
 # Install maven under the build/ folder
 install_mvn() {
-  local MVN_VERSION="3.3.3"
+  local MVN_VERSION="3.3.9"
 
   install_app \
     "http://archive.apache.org/dist/maven/maven-3/${MVN_VERSION}/binaries" \
 
@@ -29,9 +29,9 @@ import org.apache.spark.storage.{BlockId, BlockStatus}
 /**
  * A simpler value of [[Accumulable]] where the result type being accumulated is the same
  * as the types of elements being merged, i.e. variables that are only "added" to through an
- * associative operation and can therefore be efficiently supported in parallel. They can be used
- * to implement counters (as in MapReduce) or sums. Spark natively supports accumulators of numeric
- * value types, and programmers can add support for new types.
+ * associative and commutative operation and can therefore be efficiently supported in parallel.
+ * They can be used to implement counters (as in MapReduce) or sums. Spark natively supports
+ * accumulators of numeric value types, and programmers can add support for new types.
  *
  * An accumulator is created from an initial value `v` by calling [[SparkContext#accumulator]].
  * Tasks running on the cluster can then add to it using the [[Accumulable#+=]] operator.
 
@@ -278,17 +278,17 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
     combineByKey(createCombiner, mergeValue, mergeCombiners, new HashPartitioner(numPartitions))
 
   /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce.
+   * Merge the values for each key using an associative and commutative reduce function. This will
+   * also perform the merging locally on each mapper before sending results to a reducer, similarly
+   * to a "combiner" in MapReduce.
    */
   def reduceByKey(partitioner: Partitioner, func: JFunction2[V, V, V]): JavaPairRDD[K, V] =
     fromRDD(rdd.reduceByKey(partitioner, func))
 
   /**
-   * Merge the values for each key using an associative reduce function, but return the results
-   * immediately to the master as a Map. This will also perform the merging locally on each mapper
-   * before sending results to a reducer, similarly to a "combiner" in MapReduce.
+   * Merge the values for each key using an associative and commutative reduce function, but return
+   * the result immediately to the master as a Map. This will also perform the merging locally on
+   * each mapper before sending results to a reducer, similarly to a "combiner" in MapReduce.
    */
   def reduceByKeyLocally(func: JFunction2[V, V, V]): java.util.Map[K, V] =
     mapAsSerializableJavaMap(rdd.reduceByKeyLocally(func))
@@ -381,9 +381,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
     fromRDD(rdd.foldByKey(zeroValue)(func))
 
   /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
+   * Merge the values for each key using an associative and commutative reduce function. This will
+   * also perform the merging locally on each mapper before sending results to a reducer, similarly
+   * to a "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
    */
   def reduceByKey(func: JFunction2[V, V, V], numPartitions: Int): JavaPairRDD[K, V] =
     fromRDD(rdd.reduceByKey(func, numPartitions))
@@ -461,10 +461,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
     fromRDD(rdd.partitionBy(partitioner))
 
   /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce.
-   */
+    * Return an RDD containing all pairs of elements with matching keys in `this` and `other`. Each
+    * pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in `this` and
+    * (k, v2) is in `other`. Uses the given Partitioner to partition the output RDD.
+    */
   def join[W](other: JavaPairRDD[K, W], partitioner: Partitioner): JavaPairRDD[K, (V, W)] =
     fromRDD(rdd.join(other, partitioner))
 
@@ -520,9 +520,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   }
 
   /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
+   * Merge the values for each key using an associative and commutative reduce function. This will
+   * also perform the merging locally on each mapper before sending results to a reducer, similarly
+   * to a "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
    * parallelism level.
    */
   def reduceByKey(func: JFunction2[V, V, V]): JavaPairRDD[K, V] = {
 
@@ -373,7 +373,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
 
   /**
    * Aggregate the elements of each partition, and then the results for all the partitions, using a
-   * given associative and commutative function and a neutral "zero value". The function
+   * given associative function and a neutral "zero value". The function
    * op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object
    * allocation; however, it should not modify t2.
    *
 
@@ -252,7 +252,7 @@ private object FaultToleranceTest extends App with Logging {
     val f = Future {
       try {
         val res = sc.parallelize(0 until 10).collect()
-        assertTrue(res.toList == (0 until 10))
+        assertTrue(res.toList == (0 until 10).toList)
         true
       } catch {
         case e: Exception =>
 
@@ -364,6 +364,27 @@ class TaskMetrics private[spark] (initialAccums: Seq[Accumulator[_]]) extends Se
 
 }
 
+/**
+ * Internal subclass of [[TaskMetrics]] which is used only for posting events to listeners.
+ * Its purpose is to obviate the need for the driver to reconstruct the original accumulators,
+ * which might have been garbage-collected. See SPARK-13407 for more details.
+ *
+ * Instances of this class should be considered read-only and users should not call `inc*()` or
+ * `set*()` methods. While we could override the setter methods to throw
+ * UnsupportedOperationException, we choose not to do so because the overrides would quickly become
+ * out-of-date when new metrics are added.
+ */
+private[spark] class ListenerTaskMetrics(
+    initialAccums: Seq[Accumulator[_]],
+    accumUpdates: Seq[AccumulableInfo]) extends TaskMetrics(initialAccums) {
+
+  override def accumulatorUpdates(): Seq[AccumulableInfo] = accumUpdates
+
+  override private[spark] def registerAccumulator(a: Accumulable[_, _]): Unit = {
+    throw new UnsupportedOperationException("This TaskMetrics is read-only")
+  }
+}
+
 private[spark] object TaskMetrics extends Logging {
 
   def empty: TaskMetrics = new TaskMetrics
@@ -397,33 +418,15 @@ private[spark] object TaskMetrics extends Logging {
     // Initial accumulators are passed into the TaskMetrics constructor first because these
     // are required to be uniquely named. The rest of the accumulators from this task are
     // registered later because they need not satisfy this requirement.
-    val (initialAccumInfos, otherAccumInfos) = accumUpdates
-      .filter { info => info.update.isDefined }
-      .partition { info => info.name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX)) }
-    val initialAccums = initialAccumInfos.map { info =>
-      val accum = InternalAccumulator.create(info.name.get)
-      accum.setValueAny(info.update.get)
-      accum
-    }
-    // We don't know the types of the rest of the accumulators, so we try to find the same ones
-    // that were previously registered here on the driver and make copies of them. It is important
-    // that we copy the accumulators here since they are used across many tasks and we want to
-    // maintain a snapshot of their local task values when we post them to listeners downstream.
-    val otherAccums = otherAccumInfos.flatMap { info =>
-      val id = info.id
-      val acc = Accumulators.get(id).map { a =>
-        val newAcc = a.copy()
-        newAcc.setValueAny(info.update.get)
-        newAcc
+    val definedAccumUpdates = accumUpdates.filter { info => info.update.isDefined }
+    val initialAccums = definedAccumUpdates
+      .filter { info => info.name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX)) }
+      .map { info =>
+        val accum = InternalAccumulator.create(info.name.get)
+        accum.setValueAny(info.update.get)
+        accum
       }
-      if (acc.isEmpty) {
-        logWarning(s"encountered unregistered accumulator $id when reconstructing task metrics.")
-      }
-      acc
-    }
-    val metrics = new TaskMetrics(initialAccums)
-    otherAccums.foreach(metrics.registerAccumulator)
-    metrics
+    new ListenerTaskMetrics(initialAccums, definedAccumUpdates)
   }
 
 }
@@ -300,37 +300,37 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
   }
 
   /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce.
+   * Merge the values for each key using an associative and commutative reduce function. This will
+   * also perform the merging locally on each mapper before sending results to a reducer, similarly
+   * to a "combiner" in MapReduce.
    */
   def reduceByKey(partitioner: Partitioner, func: (V, V) => V): RDD[(K, V)] = self.withScope {
     combineByKeyWithClassTag[V]((v: V) => v, func, func, partitioner)
   }
 
   /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
+   * Merge the values for each key using an associative and commutative reduce function. This will
+   * also perform the merging locally on each mapper before sending results to a reducer, similarly
+   * to a "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
    */
   def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = self.withScope {
     reduceByKey(new HashPartitioner(numPartitions), func)
   }
 
   /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
+   * Merge the values for each key using an associative and commutative reduce function. This will
+   * also perform the merging locally on each mapper before sending results to a reducer, similarly
+   * to a "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
    * parallelism level.
    */
   def reduceByKey(func: (V, V) => V): RDD[(K, V)] = self.withScope {
     reduceByKey(defaultPartitioner(self), func)
   }
 
   /**
-   * Merge the values for each key using an associative reduce function, but return the results
-   * immediately to the master as a Map. This will also perform the merging locally on each mapper
-   * before sending results to a reducer, similarly to a "combiner" in MapReduce.
+   * Merge the values for each key using an associative and commutative reduce function, but return
+   * the results immediately to the master as a Map. This will also perform the merging locally on
+   * each mapper before sending results to a reducer, similarly to a "combiner" in MapReduce.
    */
   def reduceByKeyLocally(func: (V, V) => V): Map[K, V] = self.withScope {
     val cleanedF = self.sparkContext.clean(func)
 
@@ -973,7 +973,7 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Aggregate the elements of each partition, and then the results for all the partitions, using a
-   * given associative and commutative function and a neutral "zero value". The function
+   * given associative function and a neutral "zero value". The function
    * op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object
    * allocation; however, it should not modify t2.
    *
Original file line number	Diff line number	Diff line change
`@@ -373,7 +373,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {`
`373`	`373`
`374`	`374`	`/**`
`375`	`375`	`* Aggregate the elements of each partition, and then the results for all the partitions, using a`
`376`		`- * given associative and commutative function and a neutral "zero value". The function`
	`376`	`+ * given associative function and a neutral "zero value". The function`
`377`	`377`	`* op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object`
`378`	`378`	`* allocation; however, it should not modify t2.`
`379`	`379`	`*`
Original file line number	Diff line number	Diff line change
`@@ -973,7 +973,7 @@ abstract class RDD[T: ClassTag](`
`973`	`973`
`974`	`974`	`/**`
`975`	`975`	`* Aggregate the elements of each partition, and then the results for all the partitions, using a`
`976`		`- * given associative and commutative function and a neutral "zero value". The function`
	`976`	`+ * given associative function and a neutral "zero value". The function`
`977`	`977`	`* op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object`
`978`	`978`	`* allocation; however, it should not modify t2.`
`979`	`979`	`*`