apache · techaddict · Apr 25, 2014 · rxin · Apr 26, 2014 · techaddict
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -122,7 +122,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    */
   def sample(withReplacement: Boolean, fraction: Double): JavaPairRDD[K, V] =
     sample(withReplacement, fraction, Utils.random.nextLong)
-    
+
   /**
    * Return a sampled subset of this RDD.
    */
@@ -195,8 +195,17 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * the merging locally on each mapper before sending results to a reducer, similarly to a
    * "combiner" in MapReduce.
    */
+  @deprecated("Use reduceByKey(func: JFunction2[V, V, V], partitioner: Partitioner)", "1.0.0")
   def reduceByKey(partitioner: Partitioner, func: JFunction2[V, V, V]): JavaPairRDD[K, V] =
-    fromRDD(rdd.reduceByKey(partitioner, func))
+    reduceByKey(func, partitioner)
+
+  /**
+   * Merge the values for each key using an associative reduce function. This will also perform
+   * the merging locally on each mapper before sending results to a reducer, similarly to a
+   * "combiner" in MapReduce.
+   */
+  def reduceByKey(func: JFunction2[V, V, V], partitioner: Partitioner): JavaPairRDD[K, V] =
+    fromRDD(rdd.reduceByKey(func, partitioner))
 
   /**
    * Merge the values for each key using an associative reduce function, but return the results
@@ -374,7 +383,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * parallelism level.
    */
   def reduceByKey(func: JFunction2[V, V, V]): JavaPairRDD[K, V] = {
-    fromRDD(reduceByKey(defaultPartitioner(rdd), func))
+    fromRDD(reduceByKey(func, defaultPartitioner(rdd)))
   }
 
   /**

diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -160,7 +160,17 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * the merging locally on each mapper before sending results to a reducer, similarly to a
    * "combiner" in MapReduce.
    */
+  @deprecated("Use reduceByKey(func: (V, V) ⇒ V, partitioner: Partitioner)", "1.0.0")
   def reduceByKey(partitioner: Partitioner, func: (V, V) => V): RDD[(K, V)] = {
+    reduceByKey(func, partitioner)
+  }
+
+  /**
+   * Merge the values for each key using an associative reduce function. This will also perform
+   * the merging locally on each mapper before sending results to a reducer, similarly to a
+   * "combiner" in MapReduce.
+   */
+  def reduceByKey(func: (V, V) => V, partitioner: Partitioner): RDD[(K, V)] = {
     combineByKey[V]((v: V) => v, func, func, partitioner)
   }
 
@@ -258,7 +268,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
    */
   def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = {
-    reduceByKey(new HashPartitioner(numPartitions), func)
+    reduceByKey(func, new HashPartitioner(numPartitions))
   }
 
   /**
@@ -359,7 +369,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * parallelism level.
    */
   def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
-    reduceByKey(defaultPartitioner(self), func)
+    reduceByKey(func, defaultPartitioner(self))
   }
 
   /**

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -285,7 +285,7 @@ abstract class RDD[T: ClassTag](
    * Return a new RDD containing the distinct elements in this RDD.
    */
   def distinct(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] =
-    map(x => (x, null)).reduceByKey((x, y) => x, numPartitions).map(_._1)
+    map(x => (x, null.asInstanceOf[T])).reduceByKey((x: T, y: T) => x, numPartitions).map(_._1)
 
   /**
    * Return a new RDD containing the distinct elements in this RDD.
@@ -341,8 +341,8 @@ abstract class RDD[T: ClassTag](
   /**
    * Return a sampled subset of this RDD.
    */
-  def sample(withReplacement: Boolean, 
-      fraction: Double, 
+  def sample(withReplacement: Boolean,
+      fraction: Double,
       seed: Long = Utils.random.nextLong): RDD[T] = {
     require(fraction >= 0.0, "Invalid fraction value: " + fraction)
     if (withReplacement) {

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -267,7 +267,7 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
       // Run the dummy Spark job to ensure that all slaves have registered.
       // This avoids all the receivers to be scheduled on the same node.
       if (!ssc.sparkContext.isLocal) {
-        ssc.sparkContext.makeRDD(1 to 50, 50).map(x => (x, 1)).reduceByKey(_ + _, 20).collect()
+        ssc.sparkContext.makeRDD(1 to 50, 50).map(x => (x, 1)).reduceByKey((x: Int, y: Int) => x + y, 20).collect()
       }
 
       // Distribute the receivers and start them

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConversions.mapAsScalaMap
 private[streaming]
 object RawTextHelper {
 
-  /** 
+  /**
    * Splits lines and counts the words.
    */
   def splitAndCountPartitions(iter: Iterator[String]): Iterator[(String, Long)] = {
@@ -103,7 +103,7 @@ object RawTextHelper {
     for(i <- 0 to 1) {
       sc.parallelize(1 to 200000, 1000)
         .map(_ % 1331).map(_.toString)
-        .mapPartitions(splitAndCountPartitions).reduceByKey(_ + _, 10)
+        .mapPartitions(splitAndCountPartitions).reduceByKey((x: Long, y: Long) => x + y, 10)
         .count()
     }
   }
@@ -114,4 +114,3 @@ object RawTextHelper {
 
   def max(v1: Long, v2: Long) = math.max(v1, v2)
 }
-