From a5644ba531a6e0fbf653daa650d80523435b6aa8 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Thu, 25 Aug 2016 16:03:44 -0700
Subject: [PATCH 1/4] add glmnet R part code

---
 R/pkg/NAMESPACE    |  1 +
 R/pkg/R/generics.R |  5 ++++
 R/pkg/R/mllib.R    | 75 ++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 267a38c21530b..af250d428e2be 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -44,6 +44,7 @@ exportMethods("glm",
               "spark.gaussianMixture",
               "spark.als",
               "spark.kstest")
+              "spark.glmnet")
 
 # Job group lifecycle management methods
 export("setJobGroup",
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 67a999da9bc26..b51f77217a731 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1304,6 +1304,11 @@ setGeneric("year", function(x) { standardGeneric("year") })
 #' @export
 setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.glm") })
 
+#' @rdname spark.glmnet
+#' @export
+setGeneric("spark.glmnet", function(data, formula, ...) { standardGeneric("spark.glmnet") })
+
+
 #' @param x,y For \code{glm}: logical values indicating whether the response vector
 #'          and model matrix used in the fitting process should be returned as
 #'          components of the returned value.
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 98db367a856ee..35b97faa5b7f7 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -32,6 +32,13 @@
 #' @note GeneralizedLinearRegressionModel since 2.0.0
 setClass("GeneralizedLinearRegressionModel", representation(jobj = "jobj"))
 
+#' S4 class that represents a MultinomialLogisticRegressionModel
+#'
+#' @param jobj a Java object reference to the backing Scala MultinomialLogisticRegressionModelWrapper
+#' @export
+#' @note GeneralizedLinearRegressionModel since 2.1.0
+setClass("MultinomialLogisticRegressionModel", representation(jobj = "jobj"))
+
 #' S4 class that represents a NaiveBayesModel
 #'
 #' @param jobj a Java object reference to the backing Scala NaiveBayesWrapper
@@ -102,7 +109,7 @@ setClass("KSTest", representation(jobj = "jobj"))
 #' @rdname write.ml
 #' @name write.ml
 #' @export
-#' @seealso \link{spark.glm}, \link{glm},
+#' @seealso \link{spark.glm}, \link{glm}, \link{spark.glmnet},
 #' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.isoreg}, \link{spark.kmeans},
 #' @seealso \link{spark.lda}, \link{spark.mlp}, \link{spark.naiveBayes}, \link{spark.survreg}
 #' @seealso \link{read.ml}
@@ -115,7 +122,7 @@ NULL
 #' @rdname predict
 #' @name predict
 #' @export
-#' @seealso \link{spark.glm}, \link{glm},
+#' @seealso \link{spark.glm}, \link{glm}, \link{spark.glmnet},
 #' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.isoreg}, \link{spark.kmeans},
 #' @seealso \link{spark.mlp}, \link{spark.naiveBayes}, \link{spark.survreg}
 NULL
@@ -320,6 +327,54 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
             predict_internal(object, newData)
           })
 
+setMethod("spark.glmnet", signature(data = "SparkDataFrame", formula = "formula"),
+          function(data, formula, regParam = 0.0, elasticNetParam = 0.0, tol = 1e-6, maxIter = 100,
+                   fitIntercept = TRUE, standardization = TRUE, thresholds = NULL, weightCol = NULL) {
+
+            formula <- paste0(deparse(formula), collapse = "")
+            if (is.null(weightCol)) {
+              weightCol <- ""
+            }
+
+            jobj <- callJStatic("org.apache.spark.ml.r.MultinomialLogisticRegressionModelWrapper",
+                                "fit", formula, data@sdf, as.numeric(regParam), as.numeric(elasticNetParam),
+                                tol, as.integer(maxIter), as.logical(fitIntercept),
+                                as.logical(standardization), as.array(thresholds), as.character(weightCol))
+            new("MultinomialLogisticRegressionModel", jobj = jobj)
+          })
+
+#  Predicted values based on a MultinomialLogisticRegression model
+
+#' @param object a fitted MultinomialLogisticRegressionModel
+#' @param newData SparkDataFrame for testing
+#' @return \code{predict} returns a SparkDataFrame containing predicted values
+#' @rdname spark.glmnet
+#' @aliases predict,MultinomialLogisticRegressionModel,SparkDataFrame-method
+#' @export
+#' @note predict(MultinomialLogisticRegressionModel) since 2.1.0
+setMethod("predict", signature(object = "MultinomialLogisticRegressionModel"),
+          function(object, newData) {
+            predict_internal(object, newData)
+          })
+
+#  Get the summary of a MultinomialLogisticRegression model
+
+#' @return \code{summary} returns the model's coefficients, intercepts and numClasses
+#' @rdname spark.glmnet
+#' @aliases summary,MultinomialLogisticRegression-method
+#' @export
+#' @note summary(MultinomialLogisticRegressionModel) since 2.1.0
+setMethod("summary", signature(object = "MultinomialLogisticRegressionModel"),
+          function(object) {
+            jobj <- object@jobj
+            coefficients <- callJMethod(jobj, "coefficients")
+            intercepts <- callJMethod(jobj, "intercepts")
+            numClasses <- callJMethod(jobj, "numClasses")
+            k <- callJMethod(jobj, "numFeatures")
+            coefficients <- t(matrix(coefficients, ncol = k))
+            list(coefficients = coefficients, intercepts = intercepts, numClasses = numClasses)
+          })
+
 # Makes predictions from a naive Bayes model or a model produced by spark.naiveBayes(),
 # similarly to R package e1071's predict.
 
@@ -826,6 +881,20 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
             write_internal(object, path, overwrite)
           })
 
+#  Saves the multinomial logistic regressionModel to the input path.
+
+#' @param path the directory where the model is saved.
+#' @param overwrite overwrites or not if the output path already exists. Default is FALSE
+#'                  which means throw exception if the output path exists.
+#'
+#' @rdname spark.glmnet
+#' @export
+#' @note write.ml(MultinomialLogisticRegressionModel, character) since 2.1.0
+setMethod("write.ml", signature(object = "MultinomialLogisticRegressionModel", path = "character"),
+          function(object, path, overwrite = FALSE) {
+            write_internal(object, path, overwrite)
+          })
+
 #  Save fitted MLlib model to the input path
 
 #' @param path the directory where the model is saved.
@@ -922,6 +991,8 @@ read.ml <- function(path) {
     new("GaussianMixtureModel", jobj = jobj)
   } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.ALSWrapper")) {
     new("ALSModel", jobj = jobj)
+  } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.MultinomialLogisticRegressionModelWrapper")) {
+    new("MultinomialLogisticRegressionModel", jobj = jobj)
   } else {
     stop("Unsupported model: ", jobj)
   }

From eef5acd980578ce95e751ebb2010971886e39290 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Thu, 25 Aug 2016 16:10:50 -0700
Subject: [PATCH 2/4] rename wrapper

---
 R/pkg/R/mllib.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 35b97faa5b7f7..6c19beadb0b6b 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -34,7 +34,7 @@ setClass("GeneralizedLinearRegressionModel", representation(jobj = "jobj"))
 
 #' S4 class that represents a MultinomialLogisticRegressionModel
 #'
-#' @param jobj a Java object reference to the backing Scala MultinomialLogisticRegressionModelWrapper
+#' @param jobj a Java object reference to the backing Scala MultinomialLogisticRegressionModel
 #' @export
 #' @note GeneralizedLinearRegressionModel since 2.1.0
 setClass("MultinomialLogisticRegressionModel", representation(jobj = "jobj"))
@@ -336,7 +336,7 @@ setMethod("spark.glmnet", signature(data = "SparkDataFrame", formula = "formula"
               weightCol <- ""
             }
 
-            jobj <- callJStatic("org.apache.spark.ml.r.MultinomialLogisticRegressionModelWrapper",
+            jobj <- callJStatic("org.apache.spark.ml.r.MultinomialLogisticRegressionWrapper",
                                 "fit", formula, data@sdf, as.numeric(regParam), as.numeric(elasticNetParam),
                                 tol, as.integer(maxIter), as.logical(fitIntercept),
                                 as.logical(standardization), as.array(thresholds), as.character(weightCol))
@@ -991,7 +991,7 @@ read.ml <- function(path) {
     new("GaussianMixtureModel", jobj = jobj)
   } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.ALSWrapper")) {
     new("ALSModel", jobj = jobj)
-  } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.MultinomialLogisticRegressionModelWrapper")) {
+  } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.MultinomialLogisticRegressionWrapper")) {
     new("MultinomialLogisticRegressionModel", jobj = jobj)
   } else {
     stop("Unsupported model: ", jobj)

From 9f14338baa3bfd32b7b2e07f7cd28b56e4a2eea0 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Fri, 26 Aug 2016 09:40:25 -0700
Subject: [PATCH 3/4] back up change and hold the PR

---
 ...MultinomialLogisticRegressionWrapper.scala | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/r/MultinomialLogisticRegressionWrapper.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/MultinomialLogisticRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/MultinomialLogisticRegressionWrapper.scala
new file mode 100644
index 0000000000000..67e6258c52403
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/MultinomialLogisticRegressionWrapper.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.r
+
+import org.apache.hadoop.fs.Path
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.ml.{Pipeline, PipelineModel}
+import org.apache.spark.ml.attribute.AttributeGroup
+import org.apache.spark.ml.classification.{MultinomialLogisticRegression, MultinomialLogisticRegressionModel}
+import org.apache.spark.ml.feature.RFormula
+import org.apache.spark.ml.linalg._
+import org.apache.spark.ml.util._
+import org.apache.spark.sql.{DataFrame, Dataset}
+
+private[r] class MultinomialLogisticRegressionWrapper private (
+    val pipeline: PipelineModel,
+    val features: Array[String]) extends MLWritable {
+
+  private  val multinomialLogisticRegressionModel: MultinomialLogisticRegressionModel =
+    pipeline.stages(1).asInstanceOf[MultinomialLogisticRegressionModel]
+
+  lazy val coefficients: Matrix = multinomialLogisticRegressionModel.coefficients
+
+  lazy val intercepts: Vector = multinomialLogisticRegressionModel.intercepts
+
+  lazy val numClasses: Int = multinomialLogisticRegressionModel.numClasses
+
+  lazy val numFeatures: Int = multinomialLogisticRegressionModel.numFeatures
+
+  override def write: MLWriter =
+    new MultinomialLogisticRegressionWrapper.MultinomialLogisticRegressionWrapperWriter(this)
+}
+
+private[r] object MultinomialLogisticRegressionWrapper
+  extends MLReadable[MultinomialLogisticRegressionWrapper] {
+  def fit(): MultinomialLogisticRegressionWrapper = {
+
+  }
+
+  override def read: MLReader[MultinomialLogisticRegressionWrapper] =
+    new MultinomialLogisticRegressionWrapperReader
+
+  override def load(path: String): MultinomialLogisticRegressionWrapper = super.load(path)
+
+  class MultinomialLogisticRegressionWrapperWriter(instance: MultinomialLogisticRegressionWrapper)
+    extends MLWriter {
+
+    override protected def saveImpl(path: String): Unit = ???
+  }
+
+  class MultinomialLogisticRegressionWrapperReader
+    extends MLReader[MultinomialLogisticRegressionWrapper] {
+
+    override def load(path: String): MultinomialLogisticRegressionWrapper = ???
+  }
+}

From ed1a0fb7cdb57e763c6318b30544506719593622 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Fri, 23 Sep 2016 10:33:05 -0700
Subject: [PATCH 4/4] delete unused scala

---
 R/pkg/NAMESPACE                               |  2 +-
 ...MultinomialLogisticRegressionWrapper.scala | 74 -------------------
 2 files changed, 1 insertion(+), 75 deletions(-)
 delete mode 100644 mllib/src/main/scala/org/apache/spark/ml/r/MultinomialLogisticRegressionWrapper.scala

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index af250d428e2be..38969efabc8ce 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -43,7 +43,7 @@ exportMethods("glm",
               "spark.isoreg",
               "spark.gaussianMixture",
               "spark.als",
-              "spark.kstest")
+              "spark.kstest",
               "spark.glmnet")
 
 # Job group lifecycle management methods
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/MultinomialLogisticRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/MultinomialLogisticRegressionWrapper.scala
deleted file mode 100644
index 67e6258c52403..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/ml/r/MultinomialLogisticRegressionWrapper.scala
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.r
-
-import org.apache.hadoop.fs.Path
-import org.json4s._
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods._
-
-import org.apache.spark.ml.{Pipeline, PipelineModel}
-import org.apache.spark.ml.attribute.AttributeGroup
-import org.apache.spark.ml.classification.{MultinomialLogisticRegression, MultinomialLogisticRegressionModel}
-import org.apache.spark.ml.feature.RFormula
-import org.apache.spark.ml.linalg._
-import org.apache.spark.ml.util._
-import org.apache.spark.sql.{DataFrame, Dataset}
-
-private[r] class MultinomialLogisticRegressionWrapper private (
-    val pipeline: PipelineModel,
-    val features: Array[String]) extends MLWritable {
-
-  private  val multinomialLogisticRegressionModel: MultinomialLogisticRegressionModel =
-    pipeline.stages(1).asInstanceOf[MultinomialLogisticRegressionModel]
-
-  lazy val coefficients: Matrix = multinomialLogisticRegressionModel.coefficients
-
-  lazy val intercepts: Vector = multinomialLogisticRegressionModel.intercepts
-
-  lazy val numClasses: Int = multinomialLogisticRegressionModel.numClasses
-
-  lazy val numFeatures: Int = multinomialLogisticRegressionModel.numFeatures
-
-  override def write: MLWriter =
-    new MultinomialLogisticRegressionWrapper.MultinomialLogisticRegressionWrapperWriter(this)
-}
-
-private[r] object MultinomialLogisticRegressionWrapper
-  extends MLReadable[MultinomialLogisticRegressionWrapper] {
-  def fit(): MultinomialLogisticRegressionWrapper = {
-
-  }
-
-  override def read: MLReader[MultinomialLogisticRegressionWrapper] =
-    new MultinomialLogisticRegressionWrapperReader
-
-  override def load(path: String): MultinomialLogisticRegressionWrapper = super.load(path)
-
-  class MultinomialLogisticRegressionWrapperWriter(instance: MultinomialLogisticRegressionWrapper)
-    extends MLWriter {
-
-    override protected def saveImpl(path: String): Unit = ???
-  }
-
-  class MultinomialLogisticRegressionWrapperReader
-    extends MLReader[MultinomialLogisticRegressionWrapper] {
-
-    override def load(path: String): MultinomialLogisticRegressionWrapper = ???
-  }
-}