Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
package org.apache.spark.ml.clustering

import org.apache.hadoop.fs.Path
import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods._

import org.apache.spark.SparkException
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.internal.Logging
import org.apache.spark.ml.{Estimator, Model}
Expand Down Expand Up @@ -457,6 +460,31 @@ sealed abstract class LDAModel private[ml] (
def describeTopics(): DataFrame = describeTopics(10)
}

object LDAModel extends MLReadable[LDAModel] {

private class LDAModelReader extends MLReader[LDAModel] {
override def load(path: String): LDAModel = {
val metadataPath = new Path(path, "metadata").toString
val metadata = parse(sc.textFile(metadataPath, 1).first())
implicit val format = DefaultFormats
val className = (metadata \ "class").extract[String]
className match {
case c if className == classOf[LocalLDAModel].getName =>
LocalLDAModel.load(path)
case c if className == classOf[DistributedLDAModel].getName =>
DistributedLDAModel.load(path)
case _ => throw new SparkException(s"$className in $path is not a LDAModel")
}
}
}

@Since("2.0.0")
override def read: MLReader[LDAModel] = new LDAModelReader

@Since("2.0.0")
override def load(path: String): LDAModel = super.load(path)
}


/**
* :: Experimental ::
Expand Down
21 changes: 21 additions & 0 deletions mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@

package org.apache.spark.ml.clustering

import java.io.File

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
import org.apache.spark.util.Utils


object LDASuite {
Expand Down Expand Up @@ -261,4 +264,22 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
testEstimatorAndModelReadWrite(lda, dataset,
LDASuite.allParamSettings ++ Map("optimizer" -> "em"), checkModelData)
}

test("load LDAModel") {
val lda = new LDA().setK(k).setSeed(1).setOptimizer("em").setMaxIter(2)
val distributedModel = lda.fit(dataset)
val localModel = lda.setOptimizer("online").fit(dataset)

val tempDir1 = Utils.createTempDir()
val distributedPath = new File(tempDir1, "distributed").getPath
val localPath = new File(tempDir1, "local").getPath
try {
distributedModel.save(distributedPath)
localModel.save(localPath)
assert(LDAModel.load(distributedPath).isInstanceOf[DistributedLDAModel])
assert(LDAModel.load(localPath).isInstanceOf[LocalLDAModel])
} finally {
Utils.deleteRecursively(tempDir1)
}
}
}