apache
diff --git a/‎R/pkg/R/RDD.R‎
Lines changed: 4 additions & 12 deletions b/‎R/pkg/R/RDD.R‎
Lines changed: 4 additions & 12 deletions
diff --git a/‎R/pkg/R/pairRDD.R‎
Lines changed: 0 additions & 4 deletions b/‎R/pkg/R/pairRDD.R‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala‎
Lines changed: 6 additions & 0 deletions b/‎core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dev/run-tests-jenkins‎
Lines changed: 4 additions & 0 deletions b/‎dev/run-tests-jenkins‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎dev/tests/pr_new_dependencies.sh‎
Lines changed: 4 additions & 4 deletions b/‎dev/tests/pr_new_dependencies.sh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java‎
Lines changed: 2 additions & 2 deletions b/‎examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala‎
Lines changed: 3 additions & 3 deletions b/‎examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/Estimator.scala‎
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/Estimator.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala‎
Lines changed: 5 additions & 5 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala‎
Lines changed: 5 additions & 5 deletions
@@ -85,7 +85,7 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
 
   if (!inherits(prev, "PipelinedRDD") || !isPipelinable(prev)) {
     # This transformation is the first in its stage:
-    .Object@func <- func
+    .Object@func <- cleanClosure(func)
     .Object@prev_jrdd <- getJRDD(prev)
     .Object@env$prev_serializedMode <- prev@env$serializedMode
     # NOTE: We use prev_serializedMode to track the serialization mode of prev_JRDD
@@ -94,7 +94,7 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
     pipelinedFunc <- function(split, iterator) {
       func(split, prev@func(split, iterator))
     }
-    .Object@func <- pipelinedFunc
+    .Object@func <- cleanClosure(pipelinedFunc)
     .Object@prev_jrdd <- prev@prev_jrdd # maintain the pipeline
     # Get the serialization mode of the parent RDD
     .Object@env$prev_serializedMode <- prev@env$prev_serializedMode
@@ -144,17 +144,13 @@ setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
               return(rdd@env$jrdd_val)
             }
 
-            computeFunc <- function(split, part) {
-              rdd@func(split, part)
-            }
-
             packageNamesArr <- serialize(.sparkREnv[[".packages"]],
                                          connection = NULL)
 
             broadcastArr <- lapply(ls(.broadcastNames),
                                    function(name) { get(name, .broadcastNames) })
 
-            serializedFuncArr <- serialize(computeFunc, connection = NULL)
+            serializedFuncArr <- serialize(rdd@func, connection = NULL)
 
             prev_jrdd <- rdd@prev_jrdd
 
@@ -551,11 +547,7 @@ setMethod("mapPartitions",
 setMethod("lapplyPartitionsWithIndex",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
-            FUN <- cleanClosure(FUN)
-            closureCapturingFunc <- function(split, part) {
-              FUN(split, part)
-            }
-            PipelinedRDD(X, closureCapturingFunc)
+            PipelinedRDD(X, FUN)
           })
 
 #' @rdname lapplyPartitionsWithIndex
 
@@ -694,10 +694,6 @@ setMethod("cogroup",
             for (i in 1:rddsLen) {
               rdds[[i]] <- lapply(rdds[[i]], 
                                   function(x) { list(x[[1]], list(i, x[[2]])) })
-              # TODO(hao): As issue [SparkR-142] mentions, the right value of i
-              # will not be captured into UDF if getJRDD is not invoked.
-              # It should be resolved together with that issue.
-              getJRDD(rdds[[i]])  # Capture the closure.
             }
             union.rdd <- Reduce(unionRDD, rdds)
             group.func <- function(vlist) {
 
@@ -31,7 +31,7 @@ import org.apache.spark.util.StatCounter
 class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
   /** Add up the elements in this RDD. */
   def sum(): Double = {
-    self.reduce(_ + _)
+    self.fold(0.0)(_ + _)
   }
 
   /**
 
@@ -22,6 +22,12 @@ import org.scalatest.FunSuite
 import org.apache.spark._
 
 class DoubleRDDSuite extends FunSuite with SharedSparkContext {
+  test("sum") {
+    assert(sc.parallelize(Seq.empty[Double]).sum() === 0.0)
+    assert(sc.parallelize(Seq(1.0)).sum() === 1.0)
+    assert(sc.parallelize(Seq(1.0, 2.0)).sum() === 3.0)
+  }
+
   // Verify tests on the histogram functionality. We test with both evenly
   // and non-evenly spaced buckets as the bucket lookup function changes.
   test("WorksOnEmpty") {
 
@@ -161,6 +161,10 @@ pr_message=""
 # Ensure we save off the current HEAD to revert to
 current_pr_head="`git rev-parse HEAD`"
 
+echo "HEAD:  `git rev-parse HEAD`"
+echo "GHPRB: $ghprbActualCommit"
+echo "SHA1:  $sha1"
+
 # Run pull request tests
 for t in "${PR_TESTS[@]}"; do
   this_test="${FWDIR}/dev/tests/${t}.sh"
 
@@ -39,12 +39,12 @@ CURR_CP_FILE="my-classpath.txt"
 MASTER_CP_FILE="master-classpath.txt"
 
 # First switch over to the master branch
-git checkout master &>/dev/null
+git checkout -f master
 # Find and copy all pom.xml files into a *.gate file that we can check
 # against through various `git` changes
 find -name "pom.xml" -exec cp {} {}.gate \;
 # Switch back to the current PR
-git checkout "${current_pr_head}" &>/dev/null
+git checkout -f "${current_pr_head}"
 
 # Check if any *.pom files from the current branch are different from the master
 difference_q=""
@@ -71,7 +71,7 @@ else
     sort > ${CURR_CP_FILE}
 
   # Checkout the master branch to compare against
-  git checkout master &>/dev/null
+  git checkout -f master
 
   ${MVN_BIN} clean package dependency:build-classpath -DskipTests 2>/dev/null | \
     sed -n -e '/Building Spark Project Assembly/,$p' | \
@@ -84,7 +84,7 @@ else
     rev | \
     sort > ${MASTER_CP_FILE}
 
-  DIFF_RESULTS="`diff my-classpath.txt master-classpath.txt`"
+  DIFF_RESULTS="`diff ${CURR_CP_FILE} ${MASTER_CP_FILE}`"
 
   if [ -z "${DIFF_RESULTS}" ]; then
     echo " * This patch does not change any dependencies."
 
@@ -116,7 +116,7 @@ class MyJavaLogisticRegression
    */
   IntParam maxIter = new IntParam(this, "maxIter", "max number of iterations");
 
-  int getMaxIter() { return (Integer) get(maxIter); }
+  int getMaxIter() { return (Integer) getOrDefault(maxIter); }
 
   public MyJavaLogisticRegression() {
     setMaxIter(100);
@@ -211,7 +211,7 @@ public Vector predictRaw(Vector features) {
   public MyJavaLogisticRegressionModel copy() {
     MyJavaLogisticRegressionModel m =
         new MyJavaLogisticRegressionModel(parent_, fittingParamMap_, weights_);
-    Params$.MODULE$.inheritValues(this.paramMap(), this, m);
+    Params$.MODULE$.inheritValues(this.extractParamMap(), this, m);
     return m;
   }
 }
@@ -99,7 +99,7 @@ private trait MyLogisticRegressionParams extends ClassifierParams {
    * class since the maxIter parameter is only used during training (not in the Model).
    */
   val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")
-  def getMaxIter: Int = get(maxIter)
+  def getMaxIter: Int = getOrDefault(maxIter)
 }
 
 /**
@@ -174,11 +174,11 @@ private class MyLogisticRegressionModel(
    * Create a copy of the model.
    * The copy is shallow, except for the embedded paramMap, which gets a deep copy.
    *
-   * This is used for the defaul implementation of [[transform()]].
+   * This is used for the default implementation of [[transform()]].
    */
   override protected def copy(): MyLogisticRegressionModel = {
     val m = new MyLogisticRegressionModel(parent, fittingParamMap, weights)
-    Params.inheritValues(this.paramMap, this, m)
+    Params.inheritValues(extractParamMap(), this, m)
     m
   }
 }
@@ -40,7 +40,7 @@ abstract class Estimator[M <: Model[M]] extends PipelineStage with Params {
    */
   @varargs
   def fit(dataset: DataFrame, paramPairs: ParamPair[_]*): M = {
-    val map = new ParamMap().put(paramPairs: _*)
+    val map = ParamMap(paramPairs: _*)
     fit(dataset, map)
   }
 
 
@@ -84,7 +84,7 @@ class Pipeline extends Estimator[PipelineModel] {
   /** param for pipeline stages */
   val stages: Param[Array[PipelineStage]] = new Param(this, "stages", "stages of the pipeline")
   def setStages(value: Array[PipelineStage]): this.type = { set(stages, value); this }
-  def getStages: Array[PipelineStage] = get(stages)
+  def getStages: Array[PipelineStage] = getOrDefault(stages)
 
   /**
    * Fits the pipeline to the input dataset with additional parameters. If a stage is an
@@ -101,7 +101,7 @@ class Pipeline extends Estimator[PipelineModel] {
    */
   override def fit(dataset: DataFrame, paramMap: ParamMap): PipelineModel = {
     transformSchema(dataset.schema, paramMap, logging = true)
-    val map = this.paramMap ++ paramMap
+    val map = extractParamMap(paramMap)
     val theStages = map(stages)
     // Search for the last estimator.
     var indexOfLastEstimator = -1
@@ -138,7 +138,7 @@ class Pipeline extends Estimator[PipelineModel] {
   }
 
   override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
-    val map = this.paramMap ++ paramMap
+    val map = extractParamMap(paramMap)
     val theStages = map(stages)
     require(theStages.toSet.size == theStages.size,
       "Cannot have duplicate components in a pipeline.")
@@ -177,14 +177,14 @@ class PipelineModel private[ml] (
 
   override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
     // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap
-    val map = (fittingParamMap ++ this.paramMap) ++ paramMap
+    val map = fittingParamMap ++ extractParamMap(paramMap)
     transformSchema(dataset.schema, map, logging = true)
     stages.foldLeft(dataset)((cur, transformer) => transformer.transform(cur, map))
   }
 
   override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
     // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap
-    val map = (fittingParamMap ++ this.paramMap) ++ paramMap
+    val map = fittingParamMap ++ extractParamMap(paramMap)
     stages.foldLeft(schema)((cur, transformer) => transformer.transformSchema(cur, map))
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ import org.apache.spark.util.StatCounter`
`31`	`31`	`class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {`
`32`	`32`	`/** Add up the elements in this RDD. */`
`33`	`33`	`def sum(): Double = {`
`34`		`- self.reduce(_ + _)`
	`34`	`+ self.fold(0.0)(_ + _)`
`35`	`35`	`}`
`36`	`36`
`37`	`37`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,7 @@ private trait MyLogisticRegressionParams extends ClassifierParams {`
`99`	`99`	`* class since the maxIter parameter is only used during training (not in the Model).`
`100`	`100`	`*/`
`101`	`101`	`val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")`
`102`		`- def getMaxIter: Int = get(maxIter)`
	`102`	`+ def getMaxIter: Int = getOrDefault(maxIter)`
`103`	`103`	`}`
`104`	`104`
`105`	`105`	`/**`
`@@ -174,11 +174,11 @@ private class MyLogisticRegressionModel(`
`174`	`174`	`* Create a copy of the model.`
`175`	`175`	`* The copy is shallow, except for the embedded paramMap, which gets a deep copy.`
`176`	`176`	`*`
`177`		`- * This is used for the defaul implementation of [[transform()]].`
	`177`	`+ * This is used for the default implementation of [[transform()]].`
`178`	`178`	`*/`
`179`	`179`	`override protected def copy(): MyLogisticRegressionModel = {`
`180`	`180`	`val m = new MyLogisticRegressionModel(parent, fittingParamMap, weights)`
`181`		`- Params.inheritValues(this.paramMap, this, m)`
	`181`	`+ Params.inheritValues(extractParamMap(), this, m)`
`182`	`182`	`m`
`183`	`183`	`}`
`184`	`184`	`}`
Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@ abstract class Estimator[M <: Model[M]] extends PipelineStage with Params {`
`40`	`40`	`*/`
`41`	`41`	`@varargs`
`42`	`42`	`def fit(dataset: DataFrame, paramPairs: ParamPair[_]*): M = {`
`43`		`- val map = new ParamMap().put(paramPairs: _*)`
	`43`	`+ val map = ParamMap(paramPairs: _*)`
`44`	`44`	`fit(dataset, map)`
`45`	`45`	`}`
`46`	`46`