From 2783d24c02b57e163691a9f4e2f4e0c55fc1ef08 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Tue, 3 Jan 2017 15:26:28 -0800
Subject: [PATCH 1/9] fix the optimizer bug

---
 .../org/apache/spark/ml/r/LDAWrapper.scala    | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
index cbe6a705007d..3ecf804ba528 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
@@ -26,7 +26,7 @@ import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkException
 import org.apache.spark.ml.{Pipeline, PipelineModel, PipelineStage}
-import org.apache.spark.ml.clustering.{LDA, LDAModel}
+import org.apache.spark.ml.clustering.{DistributedLDAModel, LDA, LDAModel}
 import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel, RegexTokenizer, StopWordsRemover}
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param.ParamPair
@@ -45,6 +45,11 @@ private[r] class LDAWrapper private (
   import LDAWrapper._
 
   private val lda: LDAModel = pipeline.stages.last.asInstanceOf[LDAModel]
+  private val distributedMoel = lda.isDistributed match {
+    case true => pipeline.stages.last.asInstanceOf[DistributedLDAModel]
+    case _ => null
+  }
+
   private val preprocessor: PipelineModel =
     new PipelineModel(s"${Identifiable.randomUID(pipeline.uid)}", pipeline.stages.dropRight(1))
 
@@ -77,6 +82,14 @@ private[r] class LDAWrapper private (
   lazy val vocabSize: Int = lda.vocabSize
   lazy val docConcentration: Array[Double] = lda.getEffectiveDocConcentration
   lazy val topicConcentration: Double = lda.getEffectiveTopicConcentration
+  lazy val trainingLogLikelihood: Double = distributedMoel match {
+    case null => Double.NaN
+    case _ => distributedMoel.trainingLogLikelihood
+  }
+  lazy val logPrior: Double = distributedMoel match {
+    case null => Double.NaN
+    case _ => distributedMoel.logPrior
+  }
 
   override def write: MLWriter = new LDAWrapper.LDAWrapperWriter(this)
 }
@@ -123,6 +136,10 @@ private[r] object LDAWrapper extends MLReadable[LDAWrapper] {
       .setMaxIter(maxIter)
       .setSubsamplingRate(subsamplingRate)
 
+    if (optimizer == "em") {
+      lda.setOptimizer(optimizer)
+    }
+
     val featureSchema = data.schema(features)
     val stages = featureSchema.dataType match {
       case d: StringType =>

From 864aafa830f81a6d9a50d84eb3ebb2e668f92ea3 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Fri, 6 Jan 2017 10:05:49 -0800
Subject: [PATCH 2/9] set optimizer anyway

---
 mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
index 3ecf804ba528..a34c510221d9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
@@ -135,10 +135,7 @@ private[r] object LDAWrapper extends MLReadable[LDAWrapper] {
       .setK(k)
       .setMaxIter(maxIter)
       .setSubsamplingRate(subsamplingRate)
-
-    if (optimizer == "em") {
-      lda.setOptimizer(optimizer)
-    }
+      .setOptimizer(optimizer)
 
     val featureSchema = data.schema(features)
     val stages = featureSchema.dataType match {

From 456e06da42e65a658d8625ea94d1ea4208b3ac62 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Mon, 9 Jan 2017 13:28:13 -0800
Subject: [PATCH 3/9] resolve conflict

---
 R/pkg/R/mllib_clustering.R                       | 12 +++++++++++-
 .../inst/tests/testthat/test_mllib_clustering.R  | 16 ++++++++++++++--
 R/pkg/inst/tests/testthat/test_mllib_tree.R      |  1 -
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
index c44358838703..171d7157d70a 100644
--- a/R/pkg/R/mllib_clustering.R
+++ b/R/pkg/R/mllib_clustering.R
@@ -388,6 +388,13 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"),
 #'         \item{\code{topics}}{top 10 terms and their weights of all topics}
 #'         \item{\code{vocabulary}}{whole terms of the training corpus, NULL if libsvm format file
 #'               used as training set}
+#'         \item{\code{trainingLogLikelihood}}{Log likelihood of the observed tokens in the training set,
+#'               given the current parameter estimates:
+#'               log P(docs | topics, topic distributions for docs, Dirichlet hyperparameters)
+#'               It is only for \code{DistributedLDAModel} (i.e., optimizer = "em")}
+#'         \item{\code{logPrior}}{Log probability of the current parameter estimate:
+#'               log P(topics, topic distributions for docs | Dirichlet hyperparameters)
+#'               It is only for \code{DistributedLDAModel} (i.e., optimizer = "em")}
 #' @rdname spark.lda
 #' @aliases summary,LDAModel-method
 #' @export
@@ -404,11 +411,14 @@ setMethod("summary", signature(object = "LDAModel"),
             vocabSize <- callJMethod(jobj, "vocabSize")
             topics <- dataFrame(callJMethod(jobj, "topics", maxTermsPerTopic))
             vocabulary <- callJMethod(jobj, "vocabulary")
+            trainingLogLikelihood <- callJMethod(jobj, "trainingLogLikelihood")
+            logPrior <- callJMethod(jobj, "logPrior")
             list(docConcentration = unlist(docConcentration),
                  topicConcentration = topicConcentration,
                  logLikelihood = logLikelihood, logPerplexity = logPerplexity,
                  isDistributed = isDistributed, vocabSize = vocabSize,
-                 topics = topics, vocabulary = unlist(vocabulary))
+                 topics = topics, vocabulary = unlist(vocabulary),
+                 trainingLogLikelihood = trainingLogLikelihood, logPrior = logPrior)
           })
 
 #  Returns the log perplexity of a Latent Dirichlet Allocation model produced by \code{spark.lda}
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index 1980fffd80cc..530e817fdb22 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -146,12 +146,16 @@ test_that("spark.lda with libsvm", {
   topics <- stats$topicTopTerms
   weights <- stats$topicTopTermsWeights
   vocabulary <- stats$vocabulary
+  trainingLogLikelihood <- stats$trainingLogLikelihood
+  logPrior <- stats$logPrior
 
-  expect_false(isDistributed)
+  expect_true(isDistributed)
   expect_true(logLikelihood <= 0 & is.finite(logLikelihood))
   expect_true(logPerplexity >= 0 & is.finite(logPerplexity))
   expect_equal(vocabSize, 11)
   expect_true(is.null(vocabulary))
+  expect_true(trainingLogLikelihood <= 0 & !is.nan(trainingLogLikelihood))
+  expect_true(logPrior <= 0 & !is.nan(logPrior))
 
   # Test model save/load
   modelPath <- tempfile(pattern = "spark-lda", fileext = ".tmp")
@@ -161,11 +165,13 @@ test_that("spark.lda with libsvm", {
   model2 <- read.ml(modelPath)
   stats2 <- summary(model2)
 
-  expect_false(stats2$isDistributed)
+  expect_true(stats2$isDistributed)
   expect_equal(logLikelihood, stats2$logLikelihood)
   expect_equal(logPerplexity, stats2$logPerplexity)
   expect_equal(vocabSize, stats2$vocabSize)
   expect_equal(vocabulary, stats2$vocabulary)
+  expect_equal(trainingLogLikelihood, stats2$trainingLogLikelihood)
+  expect_equal(logPrior, stats2$logPrior)
 
   unlink(modelPath)
 })
@@ -182,12 +188,16 @@ test_that("spark.lda with text input", {
   topics <- stats$topicTopTerms
   weights <- stats$topicTopTermsWeights
   vocabulary <- stats$vocabulary
+  trainingLogLikelihood <- stats$trainingLogLikelihood
+  logPrior <- stats$logPrior
 
   expect_false(isDistributed)
   expect_true(logLikelihood <= 0 & is.finite(logLikelihood))
   expect_true(logPerplexity >= 0 & is.finite(logPerplexity))
   expect_equal(vocabSize, 10)
   expect_true(setequal(stats$vocabulary, c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")))
+  expect_true(is.nan(trainingLogLikelihood))
+  expect_true(is.nan(logPrior))
 
   # Test model save/load
   modelPath <- tempfile(pattern = "spark-lda-text", fileext = ".tmp")
@@ -202,6 +212,8 @@ test_that("spark.lda with text input", {
   expect_equal(logPerplexity, stats2$logPerplexity)
   expect_equal(vocabSize, stats2$vocabSize)
   expect_true(all.equal(vocabulary, stats2$vocabulary))
+  expect_equal(trainingLogLikelihood, stats2$trainingLogLikelihood)
+  expect_equal(logPrior, stats2$logPrior)
 
   unlink(modelPath)
 })
diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R
index 5d13539be8a8..e6fda251ebea 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_tree.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R
@@ -126,7 +126,6 @@ test_that("spark.randomForest", {
                                          63.53160, 64.05470, 65.12710, 64.30450,
                                          66.70910, 67.86125, 68.08700, 67.21865,
                                          68.89275, 69.53180, 69.39640, 69.68250),
-
                tolerance = 1e-4)
   stats <- summary(model)
   expect_equal(stats$numTrees, 20)

From aee8da5c770f0fef2b49f196e38723e92c28d677 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Tue, 10 Jan 2017 11:16:31 -0800
Subject: [PATCH 4/9] fix typo

---
 .../main/scala/org/apache/spark/ml/r/LDAWrapper.scala  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
index a34c510221d9..4b254ad2b280 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
@@ -45,7 +45,7 @@ private[r] class LDAWrapper private (
   import LDAWrapper._
 
   private val lda: LDAModel = pipeline.stages.last.asInstanceOf[LDAModel]
-  private val distributedMoel = lda.isDistributed match {
+  private val distributedModel = lda.isDistributed match {
     case true => pipeline.stages.last.asInstanceOf[DistributedLDAModel]
     case _ => null
   }
@@ -82,13 +82,13 @@ private[r] class LDAWrapper private (
   lazy val vocabSize: Int = lda.vocabSize
   lazy val docConcentration: Array[Double] = lda.getEffectiveDocConcentration
   lazy val topicConcentration: Double = lda.getEffectiveTopicConcentration
-  lazy val trainingLogLikelihood: Double = distributedMoel match {
+  lazy val trainingLogLikelihood: Double = distributedModel match {
     case null => Double.NaN
-    case _ => distributedMoel.trainingLogLikelihood
+    case _ => distributedModel.trainingLogLikelihood
   }
-  lazy val logPrior: Double = distributedMoel match {
+  lazy val logPrior: Double = distributedModel match {
     case null => Double.NaN
-    case _ => distributedMoel.logPrior
+    case _ => distributedModel.logPrior
   }
 
   override def write: MLWriter = new LDAWrapper.LDAWrapperWriter(this)

From 0134a2693f6abfc51d0c11d693b97971072affaa Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Thu, 12 Jan 2017 23:28:42 -0800
Subject: [PATCH 5/9] address review comments

---
 R/pkg/R/mllib_clustering.R                          | 12 ++++++++++--
 R/pkg/inst/tests/testthat/test_mllib_clustering.R   |  8 ++++----
 .../scala/org/apache/spark/ml/r/LDAWrapper.scala    | 13 +++++--------
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
index 171d7157d70a..1c8f72185fc6 100644
--- a/R/pkg/R/mllib_clustering.R
+++ b/R/pkg/R/mllib_clustering.R
@@ -411,8 +411,16 @@ setMethod("summary", signature(object = "LDAModel"),
             vocabSize <- callJMethod(jobj, "vocabSize")
             topics <- dataFrame(callJMethod(jobj, "topics", maxTermsPerTopic))
             vocabulary <- callJMethod(jobj, "vocabulary")
-            trainingLogLikelihood <- callJMethod(jobj, "trainingLogLikelihood")
-            logPrior <- callJMethod(jobj, "logPrior")
+            trainingLogLikelihood <- if (isDistributed) {
+              callJMethod(jobj, "trainingLogLikelihood")
+            } else {
+              NULL
+            }
+            logPrior <- if (isDistributed) {
+              callJMethod(jobj, "logPrior")
+            } else {
+              NULL
+            }
             list(docConcentration = unlist(docConcentration),
                  topicConcentration = topicConcentration,
                  logLikelihood = logLikelihood, logPerplexity = logPerplexity,
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index 530e817fdb22..174e3cb48a14 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -196,8 +196,8 @@ test_that("spark.lda with text input", {
   expect_true(logPerplexity >= 0 & is.finite(logPerplexity))
   expect_equal(vocabSize, 10)
   expect_true(setequal(stats$vocabulary, c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")))
-  expect_true(is.nan(trainingLogLikelihood))
-  expect_true(is.nan(logPrior))
+  expect_true(is.null(trainingLogLikelihood))
+  expect_true(is.null(logPrior))
 
   # Test model save/load
   modelPath <- tempfile(pattern = "spark-lda-text", fileext = ".tmp")
@@ -212,8 +212,8 @@ test_that("spark.lda with text input", {
   expect_equal(logPerplexity, stats2$logPerplexity)
   expect_equal(vocabSize, stats2$vocabSize)
   expect_true(all.equal(vocabulary, stats2$vocabulary))
-  expect_equal(trainingLogLikelihood, stats2$trainingLogLikelihood)
-  expect_equal(logPrior, stats2$logPrior)
+  expect_true(is.null(stats2$trainingLogLikelihood))
+  expect_true(is.null(stats2$logPrior))
 
   unlink(modelPath)
 })
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
index 4b254ad2b280..555f6948c862 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
@@ -82,14 +82,11 @@ private[r] class LDAWrapper private (
   lazy val vocabSize: Int = lda.vocabSize
   lazy val docConcentration: Array[Double] = lda.getEffectiveDocConcentration
   lazy val topicConcentration: Double = lda.getEffectiveTopicConcentration
-  lazy val trainingLogLikelihood: Double = distributedModel match {
-    case null => Double.NaN
-    case _ => distributedModel.trainingLogLikelihood
-  }
-  lazy val logPrior: Double = distributedModel match {
-    case null => Double.NaN
-    case _ => distributedModel.logPrior
-  }
+  // Only applicable to distributed lda model
+  lazy val trainingLogLikelihood: Double = distributedModel.trainingLogLikelihood
+
+  // Only applicable to distributed lda model
+  lazy val logPrior: Double = distributedModel.logPrior
 
   override def write: MLWriter = new LDAWrapper.LDAWrapperWriter(this)
 }

From b72592ce02e9a8af518a103ab81a2dfe8a103d51 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Thu, 12 Jan 2017 23:36:36 -0800
Subject: [PATCH 6/9] simplify backend code

---
 R/pkg/R/mllib_clustering.R                        |  4 ++--
 .../scala/org/apache/spark/ml/r/LDAWrapper.scala  | 15 ++++++---------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
index 1c8f72185fc6..09001612f598 100644
--- a/R/pkg/R/mllib_clustering.R
+++ b/R/pkg/R/mllib_clustering.R
@@ -391,10 +391,10 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"),
 #'         \item{\code{trainingLogLikelihood}}{Log likelihood of the observed tokens in the training set,
 #'               given the current parameter estimates:
 #'               log P(docs | topics, topic distributions for docs, Dirichlet hyperparameters)
-#'               It is only for \code{DistributedLDAModel} (i.e., optimizer = "em")}
+#'               It is only for distributed LDA model (i.e., optimizer = "em")}
 #'         \item{\code{logPrior}}{Log probability of the current parameter estimate:
 #'               log P(topics, topic distributions for docs | Dirichlet hyperparameters)
-#'               It is only for \code{DistributedLDAModel} (i.e., optimizer = "em")}
+#'               It is only for distributed LDA model (i.e., optimizer = "em")}
 #' @rdname spark.lda
 #' @aliases summary,LDAModel-method
 #' @export
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
index 555f6948c862..cae7ddcc5343 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
@@ -45,10 +45,12 @@ private[r] class LDAWrapper private (
   import LDAWrapper._
 
   private val lda: LDAModel = pipeline.stages.last.asInstanceOf[LDAModel]
-  private val distributedModel = lda.isDistributed match {
-    case true => pipeline.stages.last.asInstanceOf[DistributedLDAModel]
-    case _ => null
-  }
+
+  // Only applicable to distributed lda model
+  lazy private val distributedModel =
+    pipeline.stages.last.asInstanceOf[DistributedLDAModel]
+  lazy val trainingLogLikelihood: Double = distributedModel.trainingLogLikelihood
+  lazy val logPrior: Double = distributedModel.logPrior
 
   private val preprocessor: PipelineModel =
     new PipelineModel(s"${Identifiable.randomUID(pipeline.uid)}", pipeline.stages.dropRight(1))
@@ -82,11 +84,6 @@ private[r] class LDAWrapper private (
   lazy val vocabSize: Int = lda.vocabSize
   lazy val docConcentration: Array[Double] = lda.getEffectiveDocConcentration
   lazy val topicConcentration: Double = lda.getEffectiveTopicConcentration
-  // Only applicable to distributed lda model
-  lazy val trainingLogLikelihood: Double = distributedModel.trainingLogLikelihood
-
-  // Only applicable to distributed lda model
-  lazy val logPrior: Double = distributedModel.logPrior
 
   override def write: MLWriter = new LDAWrapper.LDAWrapperWriter(this)
 }

From 882c70da32756e7603bd293b2ba010a585fdc0c5 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Thu, 12 Jan 2017 23:39:59 -0800
Subject: [PATCH 7/9] improve comment

---
 mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
index cae7ddcc5343..e096bf1f29f3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
@@ -46,7 +46,7 @@ private[r] class LDAWrapper private (
 
   private val lda: LDAModel = pipeline.stages.last.asInstanceOf[LDAModel]
 
-  // Only applicable to distributed lda model
+  // The following variables were called by R side code only when the LDA model is distributed
   lazy private val distributedModel =
     pipeline.stages.last.asInstanceOf[DistributedLDAModel]
   lazy val trainingLogLikelihood: Double = distributedModel.trainingLogLikelihood

From 95a69106ca52844bafdf820b50ed8353d6c80a25 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Sat, 14 Jan 2017 13:55:27 -0800
Subject: [PATCH 8/9] change NULL to NA

---
 R/pkg/R/mllib_clustering.R                        | 4 ++--
 R/pkg/inst/tests/testthat/test_mllib_clustering.R | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
index 09001612f598..c63cb4da8166 100644
--- a/R/pkg/R/mllib_clustering.R
+++ b/R/pkg/R/mllib_clustering.R
@@ -414,12 +414,12 @@ setMethod("summary", signature(object = "LDAModel"),
             trainingLogLikelihood <- if (isDistributed) {
               callJMethod(jobj, "trainingLogLikelihood")
             } else {
-              NULL
+              NA
             }
             logPrior <- if (isDistributed) {
               callJMethod(jobj, "logPrior")
             } else {
-              NULL
+              NA
             }
             list(docConcentration = unlist(docConcentration),
                  topicConcentration = topicConcentration,
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index 174e3cb48a14..328a0ade5187 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -196,8 +196,8 @@ test_that("spark.lda with text input", {
   expect_true(logPerplexity >= 0 & is.finite(logPerplexity))
   expect_equal(vocabSize, 10)
   expect_true(setequal(stats$vocabulary, c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")))
-  expect_true(is.null(trainingLogLikelihood))
-  expect_true(is.null(logPrior))
+  expect_true(is.na(trainingLogLikelihood))
+  expect_true(is.na(logPrior))
 
   # Test model save/load
   modelPath <- tempfile(pattern = "spark-lda-text", fileext = ".tmp")
@@ -212,8 +212,8 @@ test_that("spark.lda with text input", {
   expect_equal(logPerplexity, stats2$logPerplexity)
   expect_equal(vocabSize, stats2$vocabSize)
   expect_true(all.equal(vocabulary, stats2$vocabulary))
-  expect_true(is.null(stats2$trainingLogLikelihood))
-  expect_true(is.null(stats2$logPrior))
+  expect_true(is.na(stats2$trainingLogLikelihood))
+  expect_true(is.na(stats2$logPrior))
 
   unlink(modelPath)
 })

From e133ee64961beaf10ccccb7885ece76ded021ae5 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Sun, 15 Jan 2017 21:00:17 -0800
Subject: [PATCH 9/9] address review comments

---
 R/pkg/inst/tests/testthat/test_mllib_clustering.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index 328a0ade5187..b3635c4484b1 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -154,8 +154,8 @@ test_that("spark.lda with libsvm", {
   expect_true(logPerplexity >= 0 & is.finite(logPerplexity))
   expect_equal(vocabSize, 11)
   expect_true(is.null(vocabulary))
-  expect_true(trainingLogLikelihood <= 0 & !is.nan(trainingLogLikelihood))
-  expect_true(logPrior <= 0 & !is.nan(logPrior))
+  expect_true(trainingLogLikelihood <= 0 & !is.na(trainingLogLikelihood))
+  expect_true(logPrior <= 0 & !is.na(logPrior))
 
   # Test model save/load
   modelPath <- tempfile(pattern = "spark-lda", fileext = ".tmp")