From 2783d24c02b57e163691a9f4e2f4e0c55fc1ef08 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Tue, 3 Jan 2017 15:26:28 -0800 Subject: [PATCH 1/9] fix the optimizer bug --- .../org/apache/spark/ml/r/LDAWrapper.scala | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala index cbe6a705007d..3ecf804ba528 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala @@ -26,7 +26,7 @@ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkException import org.apache.spark.ml.{Pipeline, PipelineModel, PipelineStage} -import org.apache.spark.ml.clustering.{LDA, LDAModel} +import org.apache.spark.ml.clustering.{DistributedLDAModel, LDA, LDAModel} import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel, RegexTokenizer, StopWordsRemover} import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.ParamPair @@ -45,6 +45,11 @@ private[r] class LDAWrapper private ( import LDAWrapper._ private val lda: LDAModel = pipeline.stages.last.asInstanceOf[LDAModel] + private val distributedMoel = lda.isDistributed match { + case true => pipeline.stages.last.asInstanceOf[DistributedLDAModel] + case _ => null + } + private val preprocessor: PipelineModel = new PipelineModel(s"${Identifiable.randomUID(pipeline.uid)}", pipeline.stages.dropRight(1)) @@ -77,6 +82,14 @@ private[r] class LDAWrapper private ( lazy val vocabSize: Int = lda.vocabSize lazy val docConcentration: Array[Double] = lda.getEffectiveDocConcentration lazy val topicConcentration: Double = lda.getEffectiveTopicConcentration + lazy val trainingLogLikelihood: Double = distributedMoel match { + case null => Double.NaN + case _ => distributedMoel.trainingLogLikelihood + } + lazy val logPrior: Double = distributedMoel match { + case null => Double.NaN + case _ => distributedMoel.logPrior + } override def write: MLWriter = new LDAWrapper.LDAWrapperWriter(this) } @@ -123,6 +136,10 @@ private[r] object LDAWrapper extends MLReadable[LDAWrapper] { .setMaxIter(maxIter) .setSubsamplingRate(subsamplingRate) + if (optimizer == "em") { + lda.setOptimizer(optimizer) + } + val featureSchema = data.schema(features) val stages = featureSchema.dataType match { case d: StringType => From 864aafa830f81a6d9a50d84eb3ebb2e668f92ea3 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Fri, 6 Jan 2017 10:05:49 -0800 Subject: [PATCH 2/9] set optimizer anyway --- mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala index 3ecf804ba528..a34c510221d9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala @@ -135,10 +135,7 @@ private[r] object LDAWrapper extends MLReadable[LDAWrapper] { .setK(k) .setMaxIter(maxIter) .setSubsamplingRate(subsamplingRate) - - if (optimizer == "em") { - lda.setOptimizer(optimizer) - } + .setOptimizer(optimizer) val featureSchema = data.schema(features) val stages = featureSchema.dataType match { From 456e06da42e65a658d8625ea94d1ea4208b3ac62 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Mon, 9 Jan 2017 13:28:13 -0800 Subject: [PATCH 3/9] resolve conflict --- R/pkg/R/mllib_clustering.R | 12 +++++++++++- .../inst/tests/testthat/test_mllib_clustering.R | 16 ++++++++++++++-- R/pkg/inst/tests/testthat/test_mllib_tree.R | 1 - 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R index c44358838703..171d7157d70a 100644 --- a/R/pkg/R/mllib_clustering.R +++ b/R/pkg/R/mllib_clustering.R @@ -388,6 +388,13 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"), #' \item{\code{topics}}{top 10 terms and their weights of all topics} #' \item{\code{vocabulary}}{whole terms of the training corpus, NULL if libsvm format file #' used as training set} +#' \item{\code{trainingLogLikelihood}}{Log likelihood of the observed tokens in the training set, +#' given the current parameter estimates: +#' log P(docs | topics, topic distributions for docs, Dirichlet hyperparameters) +#' It is only for \code{DistributedLDAModel} (i.e., optimizer = "em")} +#' \item{\code{logPrior}}{Log probability of the current parameter estimate: +#' log P(topics, topic distributions for docs | Dirichlet hyperparameters) +#' It is only for \code{DistributedLDAModel} (i.e., optimizer = "em")} #' @rdname spark.lda #' @aliases summary,LDAModel-method #' @export @@ -404,11 +411,14 @@ setMethod("summary", signature(object = "LDAModel"), vocabSize <- callJMethod(jobj, "vocabSize") topics <- dataFrame(callJMethod(jobj, "topics", maxTermsPerTopic)) vocabulary <- callJMethod(jobj, "vocabulary") + trainingLogLikelihood <- callJMethod(jobj, "trainingLogLikelihood") + logPrior <- callJMethod(jobj, "logPrior") list(docConcentration = unlist(docConcentration), topicConcentration = topicConcentration, logLikelihood = logLikelihood, logPerplexity = logPerplexity, isDistributed = isDistributed, vocabSize = vocabSize, - topics = topics, vocabulary = unlist(vocabulary)) + topics = topics, vocabulary = unlist(vocabulary), + trainingLogLikelihood = trainingLogLikelihood, logPrior = logPrior) }) # Returns the log perplexity of a Latent Dirichlet Allocation model produced by \code{spark.lda} diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index 1980fffd80cc..530e817fdb22 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -146,12 +146,16 @@ test_that("spark.lda with libsvm", { topics <- stats$topicTopTerms weights <- stats$topicTopTermsWeights vocabulary <- stats$vocabulary + trainingLogLikelihood <- stats$trainingLogLikelihood + logPrior <- stats$logPrior - expect_false(isDistributed) + expect_true(isDistributed) expect_true(logLikelihood <= 0 & is.finite(logLikelihood)) expect_true(logPerplexity >= 0 & is.finite(logPerplexity)) expect_equal(vocabSize, 11) expect_true(is.null(vocabulary)) + expect_true(trainingLogLikelihood <= 0 & !is.nan(trainingLogLikelihood)) + expect_true(logPrior <= 0 & !is.nan(logPrior)) # Test model save/load modelPath <- tempfile(pattern = "spark-lda", fileext = ".tmp") @@ -161,11 +165,13 @@ test_that("spark.lda with libsvm", { model2 <- read.ml(modelPath) stats2 <- summary(model2) - expect_false(stats2$isDistributed) + expect_true(stats2$isDistributed) expect_equal(logLikelihood, stats2$logLikelihood) expect_equal(logPerplexity, stats2$logPerplexity) expect_equal(vocabSize, stats2$vocabSize) expect_equal(vocabulary, stats2$vocabulary) + expect_equal(trainingLogLikelihood, stats2$trainingLogLikelihood) + expect_equal(logPrior, stats2$logPrior) unlink(modelPath) }) @@ -182,12 +188,16 @@ test_that("spark.lda with text input", { topics <- stats$topicTopTerms weights <- stats$topicTopTermsWeights vocabulary <- stats$vocabulary + trainingLogLikelihood <- stats$trainingLogLikelihood + logPrior <- stats$logPrior expect_false(isDistributed) expect_true(logLikelihood <= 0 & is.finite(logLikelihood)) expect_true(logPerplexity >= 0 & is.finite(logPerplexity)) expect_equal(vocabSize, 10) expect_true(setequal(stats$vocabulary, c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"))) + expect_true(is.nan(trainingLogLikelihood)) + expect_true(is.nan(logPrior)) # Test model save/load modelPath <- tempfile(pattern = "spark-lda-text", fileext = ".tmp") @@ -202,6 +212,8 @@ test_that("spark.lda with text input", { expect_equal(logPerplexity, stats2$logPerplexity) expect_equal(vocabSize, stats2$vocabSize) expect_true(all.equal(vocabulary, stats2$vocabulary)) + expect_equal(trainingLogLikelihood, stats2$trainingLogLikelihood) + expect_equal(logPrior, stats2$logPrior) unlink(modelPath) }) diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R index 5d13539be8a8..e6fda251ebea 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_tree.R +++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R @@ -126,7 +126,6 @@ test_that("spark.randomForest", { 63.53160, 64.05470, 65.12710, 64.30450, 66.70910, 67.86125, 68.08700, 67.21865, 68.89275, 69.53180, 69.39640, 69.68250), - tolerance = 1e-4) stats <- summary(model) expect_equal(stats$numTrees, 20) From aee8da5c770f0fef2b49f196e38723e92c28d677 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Tue, 10 Jan 2017 11:16:31 -0800 Subject: [PATCH 4/9] fix typo --- .../main/scala/org/apache/spark/ml/r/LDAWrapper.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala index a34c510221d9..4b254ad2b280 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala @@ -45,7 +45,7 @@ private[r] class LDAWrapper private ( import LDAWrapper._ private val lda: LDAModel = pipeline.stages.last.asInstanceOf[LDAModel] - private val distributedMoel = lda.isDistributed match { + private val distributedModel = lda.isDistributed match { case true => pipeline.stages.last.asInstanceOf[DistributedLDAModel] case _ => null } @@ -82,13 +82,13 @@ private[r] class LDAWrapper private ( lazy val vocabSize: Int = lda.vocabSize lazy val docConcentration: Array[Double] = lda.getEffectiveDocConcentration lazy val topicConcentration: Double = lda.getEffectiveTopicConcentration - lazy val trainingLogLikelihood: Double = distributedMoel match { + lazy val trainingLogLikelihood: Double = distributedModel match { case null => Double.NaN - case _ => distributedMoel.trainingLogLikelihood + case _ => distributedModel.trainingLogLikelihood } - lazy val logPrior: Double = distributedMoel match { + lazy val logPrior: Double = distributedModel match { case null => Double.NaN - case _ => distributedMoel.logPrior + case _ => distributedModel.logPrior } override def write: MLWriter = new LDAWrapper.LDAWrapperWriter(this) From 0134a2693f6abfc51d0c11d693b97971072affaa Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Thu, 12 Jan 2017 23:28:42 -0800 Subject: [PATCH 5/9] address review comments --- R/pkg/R/mllib_clustering.R | 12 ++++++++++-- R/pkg/inst/tests/testthat/test_mllib_clustering.R | 8 ++++---- .../scala/org/apache/spark/ml/r/LDAWrapper.scala | 13 +++++-------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R index 171d7157d70a..1c8f72185fc6 100644 --- a/R/pkg/R/mllib_clustering.R +++ b/R/pkg/R/mllib_clustering.R @@ -411,8 +411,16 @@ setMethod("summary", signature(object = "LDAModel"), vocabSize <- callJMethod(jobj, "vocabSize") topics <- dataFrame(callJMethod(jobj, "topics", maxTermsPerTopic)) vocabulary <- callJMethod(jobj, "vocabulary") - trainingLogLikelihood <- callJMethod(jobj, "trainingLogLikelihood") - logPrior <- callJMethod(jobj, "logPrior") + trainingLogLikelihood <- if (isDistributed) { + callJMethod(jobj, "trainingLogLikelihood") + } else { + NULL + } + logPrior <- if (isDistributed) { + callJMethod(jobj, "logPrior") + } else { + NULL + } list(docConcentration = unlist(docConcentration), topicConcentration = topicConcentration, logLikelihood = logLikelihood, logPerplexity = logPerplexity, diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index 530e817fdb22..174e3cb48a14 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -196,8 +196,8 @@ test_that("spark.lda with text input", { expect_true(logPerplexity >= 0 & is.finite(logPerplexity)) expect_equal(vocabSize, 10) expect_true(setequal(stats$vocabulary, c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"))) - expect_true(is.nan(trainingLogLikelihood)) - expect_true(is.nan(logPrior)) + expect_true(is.null(trainingLogLikelihood)) + expect_true(is.null(logPrior)) # Test model save/load modelPath <- tempfile(pattern = "spark-lda-text", fileext = ".tmp") @@ -212,8 +212,8 @@ test_that("spark.lda with text input", { expect_equal(logPerplexity, stats2$logPerplexity) expect_equal(vocabSize, stats2$vocabSize) expect_true(all.equal(vocabulary, stats2$vocabulary)) - expect_equal(trainingLogLikelihood, stats2$trainingLogLikelihood) - expect_equal(logPrior, stats2$logPrior) + expect_true(is.null(stats2$trainingLogLikelihood)) + expect_true(is.null(stats2$logPrior)) unlink(modelPath) }) diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala index 4b254ad2b280..555f6948c862 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala @@ -82,14 +82,11 @@ private[r] class LDAWrapper private ( lazy val vocabSize: Int = lda.vocabSize lazy val docConcentration: Array[Double] = lda.getEffectiveDocConcentration lazy val topicConcentration: Double = lda.getEffectiveTopicConcentration - lazy val trainingLogLikelihood: Double = distributedModel match { - case null => Double.NaN - case _ => distributedModel.trainingLogLikelihood - } - lazy val logPrior: Double = distributedModel match { - case null => Double.NaN - case _ => distributedModel.logPrior - } + // Only applicable to distributed lda model + lazy val trainingLogLikelihood: Double = distributedModel.trainingLogLikelihood + + // Only applicable to distributed lda model + lazy val logPrior: Double = distributedModel.logPrior override def write: MLWriter = new LDAWrapper.LDAWrapperWriter(this) } From b72592ce02e9a8af518a103ab81a2dfe8a103d51 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Thu, 12 Jan 2017 23:36:36 -0800 Subject: [PATCH 6/9] simplify backend code --- R/pkg/R/mllib_clustering.R | 4 ++-- .../scala/org/apache/spark/ml/r/LDAWrapper.scala | 15 ++++++--------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R index 1c8f72185fc6..09001612f598 100644 --- a/R/pkg/R/mllib_clustering.R +++ b/R/pkg/R/mllib_clustering.R @@ -391,10 +391,10 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"), #' \item{\code{trainingLogLikelihood}}{Log likelihood of the observed tokens in the training set, #' given the current parameter estimates: #' log P(docs | topics, topic distributions for docs, Dirichlet hyperparameters) -#' It is only for \code{DistributedLDAModel} (i.e., optimizer = "em")} +#' It is only for distributed LDA model (i.e., optimizer = "em")} #' \item{\code{logPrior}}{Log probability of the current parameter estimate: #' log P(topics, topic distributions for docs | Dirichlet hyperparameters) -#' It is only for \code{DistributedLDAModel} (i.e., optimizer = "em")} +#' It is only for distributed LDA model (i.e., optimizer = "em")} #' @rdname spark.lda #' @aliases summary,LDAModel-method #' @export diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala index 555f6948c862..cae7ddcc5343 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala @@ -45,10 +45,12 @@ private[r] class LDAWrapper private ( import LDAWrapper._ private val lda: LDAModel = pipeline.stages.last.asInstanceOf[LDAModel] - private val distributedModel = lda.isDistributed match { - case true => pipeline.stages.last.asInstanceOf[DistributedLDAModel] - case _ => null - } + + // Only applicable to distributed lda model + lazy private val distributedModel = + pipeline.stages.last.asInstanceOf[DistributedLDAModel] + lazy val trainingLogLikelihood: Double = distributedModel.trainingLogLikelihood + lazy val logPrior: Double = distributedModel.logPrior private val preprocessor: PipelineModel = new PipelineModel(s"${Identifiable.randomUID(pipeline.uid)}", pipeline.stages.dropRight(1)) @@ -82,11 +84,6 @@ private[r] class LDAWrapper private ( lazy val vocabSize: Int = lda.vocabSize lazy val docConcentration: Array[Double] = lda.getEffectiveDocConcentration lazy val topicConcentration: Double = lda.getEffectiveTopicConcentration - // Only applicable to distributed lda model - lazy val trainingLogLikelihood: Double = distributedModel.trainingLogLikelihood - - // Only applicable to distributed lda model - lazy val logPrior: Double = distributedModel.logPrior override def write: MLWriter = new LDAWrapper.LDAWrapperWriter(this) } From 882c70da32756e7603bd293b2ba010a585fdc0c5 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Thu, 12 Jan 2017 23:39:59 -0800 Subject: [PATCH 7/9] improve comment --- mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala index cae7ddcc5343..e096bf1f29f3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala @@ -46,7 +46,7 @@ private[r] class LDAWrapper private ( private val lda: LDAModel = pipeline.stages.last.asInstanceOf[LDAModel] - // Only applicable to distributed lda model + // The following variables were called by R side code only when the LDA model is distributed lazy private val distributedModel = pipeline.stages.last.asInstanceOf[DistributedLDAModel] lazy val trainingLogLikelihood: Double = distributedModel.trainingLogLikelihood From 95a69106ca52844bafdf820b50ed8353d6c80a25 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Sat, 14 Jan 2017 13:55:27 -0800 Subject: [PATCH 8/9] change NULL to NA --- R/pkg/R/mllib_clustering.R | 4 ++-- R/pkg/inst/tests/testthat/test_mllib_clustering.R | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R index 09001612f598..c63cb4da8166 100644 --- a/R/pkg/R/mllib_clustering.R +++ b/R/pkg/R/mllib_clustering.R @@ -414,12 +414,12 @@ setMethod("summary", signature(object = "LDAModel"), trainingLogLikelihood <- if (isDistributed) { callJMethod(jobj, "trainingLogLikelihood") } else { - NULL + NA } logPrior <- if (isDistributed) { callJMethod(jobj, "logPrior") } else { - NULL + NA } list(docConcentration = unlist(docConcentration), topicConcentration = topicConcentration, diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index 174e3cb48a14..328a0ade5187 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -196,8 +196,8 @@ test_that("spark.lda with text input", { expect_true(logPerplexity >= 0 & is.finite(logPerplexity)) expect_equal(vocabSize, 10) expect_true(setequal(stats$vocabulary, c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"))) - expect_true(is.null(trainingLogLikelihood)) - expect_true(is.null(logPrior)) + expect_true(is.na(trainingLogLikelihood)) + expect_true(is.na(logPrior)) # Test model save/load modelPath <- tempfile(pattern = "spark-lda-text", fileext = ".tmp") @@ -212,8 +212,8 @@ test_that("spark.lda with text input", { expect_equal(logPerplexity, stats2$logPerplexity) expect_equal(vocabSize, stats2$vocabSize) expect_true(all.equal(vocabulary, stats2$vocabulary)) - expect_true(is.null(stats2$trainingLogLikelihood)) - expect_true(is.null(stats2$logPrior)) + expect_true(is.na(stats2$trainingLogLikelihood)) + expect_true(is.na(stats2$logPrior)) unlink(modelPath) }) From e133ee64961beaf10ccccb7885ece76ded021ae5 Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Sun, 15 Jan 2017 21:00:17 -0800 Subject: [PATCH 9/9] address review comments --- R/pkg/inst/tests/testthat/test_mllib_clustering.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index 328a0ade5187..b3635c4484b1 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -154,8 +154,8 @@ test_that("spark.lda with libsvm", { expect_true(logPerplexity >= 0 & is.finite(logPerplexity)) expect_equal(vocabSize, 11) expect_true(is.null(vocabulary)) - expect_true(trainingLogLikelihood <= 0 & !is.nan(trainingLogLikelihood)) - expect_true(logPrior <= 0 & !is.nan(logPrior)) + expect_true(trainingLogLikelihood <= 0 & !is.na(trainingLogLikelihood)) + expect_true(logPrior <= 0 & !is.na(logPrior)) # Test model save/load modelPath <- tempfile(pattern = "spark-lda", fileext = ".tmp")