Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class RandomForestClassifier @Since("1.4.0") (
.map(_.asInstanceOf[DecisionTreeClassificationModel])

val numFeatures = oldDataset.first().features.size
val m = new RandomForestClassificationModel(trees, numFeatures, numClasses)
val m = new RandomForestClassificationModel(uid, trees, numFeatures, numClasses)
instr.logSuccess(m)
m
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
.map(_.asInstanceOf[DecisionTreeRegressionModel])

val numFeatures = oldDataset.first().features.size
val m = new RandomForestRegressionModel(trees, numFeatures)
val m = new RandomForestRegressionModel(uid, trees, numFeatures)
instr.logSuccess(m)
m
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class PipelineSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul
.setStages(Array(estimator0, transformer1, estimator2, transformer3))
val pipelineModel = pipeline.fit(dataset0)

MLTestingUtils.checkCopy(pipelineModel)
MLTestingUtils.checkCopyAndUids(pipeline, pipelineModel)

assert(pipelineModel.stages.length === 4)
assert(pipelineModel.stages(0).eq(model0))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,7 @@ class DecisionTreeClassifierSuite
val newData: DataFrame = TreeTests.setMetadata(rdd, categoricalFeatures, numClasses)
val newTree = dt.fit(newData)

// copied model must have the same parent.
MLTestingUtils.checkCopy(newTree)
MLTestingUtils.checkCopyAndUids(dt, newTree)

val predictions = newTree.transform(newData)
.select(newTree.getPredictionCol, newTree.getRawPredictionCol, newTree.getProbabilityCol)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,7 @@ class GBTClassifierSuite extends SparkFunSuite with MLlibTestSparkContext
assert(model.getProbabilityCol === "probability")
assert(model.hasParent)

// copied model must have the same parent.
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(gbt, model)
}

test("setThreshold, getThreshold") {
Expand Down Expand Up @@ -261,8 +260,7 @@ class GBTClassifierSuite extends SparkFunSuite with MLlibTestSparkContext
.setSeed(123)
val model = gbt.fit(df)

// copied model must have the same parent.
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(gbt, model)

sc.checkpointDir = None
Utils.deleteRecursively(tempDir)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,7 @@ class LinearSVCSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
assert(model.hasParent)
assert(model.numFeatures === 2)

// copied model must have the same parent.
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(lsvc, model)
}

test("linear svc doesn't fit intercept when fitIntercept is off") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ class LogisticRegressionSuite
assert(model.intercept !== 0.0)
assert(model.hasParent)

// copied model must have the same parent.
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(lr, model)
assert(model.hasSummary)
val copiedModel = model.copy(ParamMap.empty)
assert(copiedModel.hasSummary)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ class MultilayerPerceptronClassifierSuite
.setMaxIter(100)
.setSolver("l-bfgs")
val model = trainer.fit(dataset)
MLTestingUtils.checkCopy(model)
val result = model.transform(dataset)
MLTestingUtils.checkCopyAndUids(trainer, model)
val predictionAndLabels = result.select("prediction", "label").collect()
predictionAndLabels.foreach { case Row(p: Double, l: Double) =>
assert(p == l)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa

validateModelFit(pi, theta, model)
assert(model.hasParent)
MLTestingUtils.checkCopyAndUids(nb, model)

val validationDataset =
generateNaiveBayesInput(piArray, thetaArray, nPoints, 17, "multinomial").toDF()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
assert(ova.getPredictionCol === "prediction")
val ovaModel = ova.fit(dataset)

// copied model must have the same parent.
MLTestingUtils.checkCopy(ovaModel)
MLTestingUtils.checkCopyAndUids(ova, ovaModel)

assert(ovaModel.models.length === numClasses)
val transformedDataset = ovaModel.transform(dataset)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,7 @@ class RandomForestClassifierSuite
val df: DataFrame = TreeTests.setMetadata(rdd, categoricalFeatures, numClasses)
val model = rf.fit(df)

// copied model must have the same parent.
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(rf, model)

val predictions = model.transform(df)
.select(rf.getPredictionCol, rf.getRawPredictionCol, rf.getProbabilityCol)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ class BisectingKMeansSuite
assert(bkm.getMinDivisibleClusterSize === 1.0)
val model = bkm.setMaxIter(1).fit(dataset)

// copied model must have the same parent
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(bkm, model)
assert(model.hasSummary)
val copiedModel = model.copy(ParamMap.empty)
assert(copiedModel.hasSummary)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@ class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext
assert(gm.getTol === 0.01)
val model = gm.setMaxIter(1).fit(dataset)

// copied model must have the same parent
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(gm, model)
assert(model.hasSummary)
val copiedModel = model.copy(ParamMap.empty)
assert(copiedModel.hasSummary)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultR
assert(kmeans.getTol === 1e-4)
val model = kmeans.setMaxIter(1).fit(dataset)

// copied model must have the same parent
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(kmeans, model)
assert(model.hasSummary)
val copiedModel = model.copy(ParamMap.empty)
assert(copiedModel.hasSummary)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
val lda = new LDA().setK(k).setSeed(1).setOptimizer("online").setMaxIter(2)
val model = lda.fit(dataset)

MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(lda, model)

assert(model.isInstanceOf[LocalLDAModel])
assert(model.vocabSize === vocabSize)
Expand Down Expand Up @@ -221,7 +221,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
val lda = new LDA().setK(k).setSeed(1).setOptimizer("em").setMaxIter(2)
val model_ = lda.fit(dataset)

MLTestingUtils.checkCopy(model_)
MLTestingUtils.checkCopyAndUids(lda, model_)

assert(model_.isInstanceOf[DistributedLDAModel])
val model = model_.asInstanceOf[DistributedLDAModel]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ class BucketedRandomProjectionLSHSuite
unitVectors.foreach { v: Vector =>
assert(Vectors.norm(v, 2.0) ~== 1.0 absTol 1e-14)
}
MLTestingUtils.checkCopy(brpModel)

MLTestingUtils.checkCopyAndUids(brp, brpModel)
}

test("BucketedRandomProjectionLSH: test of LSH property") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ class ChiSqSelectorSuite extends SparkFunSuite with MLlibTestSparkContext
test("Test Chi-Square selector: numTopFeatures") {
val selector = new ChiSqSelector()
.setOutputCol("filtered").setSelectorType("numTopFeatures").setNumTopFeatures(1)
ChiSqSelectorSuite.testSelector(selector, dataset)
val model = ChiSqSelectorSuite.testSelector(selector, dataset)
MLTestingUtils.checkCopyAndUids(selector, model)
}

test("Test Chi-Square selector: percentile") {
Expand Down Expand Up @@ -166,11 +167,13 @@ class ChiSqSelectorSuite extends SparkFunSuite with MLlibTestSparkContext

object ChiSqSelectorSuite {

private def testSelector(selector: ChiSqSelector, dataset: Dataset[_]): Unit = {
selector.fit(dataset).transform(dataset).select("filtered", "topFeature").collect()
private def testSelector(selector: ChiSqSelector, dataset: Dataset[_]): ChiSqSelectorModel = {
val selectorModel = selector.fit(dataset)
selectorModel.transform(dataset).select("filtered", "topFeature").collect()
.foreach { case Row(vec1: Vector, vec2: Vector) =>
assert(vec1 ~== vec2 absTol 1e-1)
}
selectorModel
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.param.ParamsSuite
import org.apache.spark.ml.util.DefaultReadWriteTest
import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
import org.apache.spark.ml.util.TestingUtils._
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.sql.Row
Expand Down Expand Up @@ -68,10 +68,11 @@ class CountVectorizerSuite extends SparkFunSuite with MLlibTestSparkContext
val cv = new CountVectorizer()
.setInputCol("words")
.setOutputCol("features")
.fit(df)
assert(cv.vocabulary.toSet === Set("a", "b", "c", "d", "e"))
val cvm = cv.fit(df)
MLTestingUtils.checkCopyAndUids(cv, cvm)
assert(cvm.vocabulary.toSet === Set("a", "b", "c", "d", "e"))

cv.transform(df).select("features", "expected").collect().foreach {
cvm.transform(df).select("features", "expected").collect().foreach {
case Row(features: Vector, expected: Vector) =>
assert(features ~== expected absTol 1e-14)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
import org.apache.spark.ml.param.ParamsSuite
import org.apache.spark.ml.util.DefaultReadWriteTest
import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
import org.apache.spark.ml.util.TestingUtils._
import org.apache.spark.mllib.feature.{IDFModel => OldIDFModel}
import org.apache.spark.mllib.linalg.VectorImplicits._
Expand Down Expand Up @@ -65,10 +65,12 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead

val df = data.zip(expected).toSeq.toDF("features", "expected")

val idfModel = new IDF()
val idfEst = new IDF()
.setInputCol("features")
.setOutputCol("idfValue")
.fit(df)
val idfModel = idfEst.fit(df)

MLTestingUtils.checkCopyAndUids(idfEst, idfModel)

idfModel.transform(df).select("idfValue", "expected").collect().foreach {
case Row(x: Vector, y: Vector) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.ml.feature

import org.apache.spark.ml.linalg.{Vector, VectorUDT}
import org.apache.spark.ml.util.SchemaUtils
import org.apache.spark.ml.util.{MLTestingUtils, SchemaUtils}
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.DataTypes
Expand Down Expand Up @@ -58,6 +58,8 @@ private[ml] object LSHTest {
val outputCol = model.getOutputCol
val transformedData = model.transform(dataset)

MLTestingUtils.checkCopyAndUids(lsh, model)

// Check output column type
SchemaUtils.checkColumnType(
transformedData.schema, model.getOutputCol, DataTypes.createArrayType(new VectorUDT))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ class MaxAbsScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De
assert(vector1.equals(vector2), s"MaxAbsScaler ut error: $vector2 should be $vector1")
}

// copied model must have the same parent.
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(scaler, model)
}

test("MaxAbsScaler read/write") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class MinHashLSHSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
.setOutputCol("values")
val model = mh.fit(dataset)
assert(mh.uid === model.uid)
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(mh, model)
}

test("hashFunction") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ class MinMaxScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De
assert(vector1.equals(vector2), "Transformed vector is different with expected.")
}

// copied model must have the same parent.
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(scaler, model)
}

test("MinMaxScaler arguments max must be larger than min") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ class PCASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
.setInputCol("features")
.setOutputCol("pca_features")
.setK(3)
.fit(df)

// copied model must have the same parent.
MLTestingUtils.checkCopy(pca)
val pcaModel = pca.fit(df)

pca.transform(df).select("pca_features", "expected").collect().foreach {
MLTestingUtils.checkCopyAndUids(pca, pcaModel)

pcaModel.transform(df).select("pca_features", "expected").collect().foreach {
case Row(x: Vector, y: Vector) =>
assert(x ~== y absTol 1e-5, "Transformed vector is different with expected vector.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul
val formula = new RFormula().setFormula("id ~ v1 + v2")
val original = Seq((0, 1.0, 3.0), (2, 2.0, 5.0)).toDF("id", "v1", "v2")
val model = formula.fit(original)
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(formula, model)
val result = model.transform(original)
val resultSchema = model.transformSchema(original.schema)
val expected = Seq(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.param.ParamsSuite
import org.apache.spark.ml.util.DefaultReadWriteTest
import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
import org.apache.spark.ml.util.TestingUtils._
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.sql.{DataFrame, Row}
Expand Down Expand Up @@ -77,10 +77,11 @@ class StandardScalerSuite extends SparkFunSuite with MLlibTestSparkContext
test("Standardization with default parameter") {
val df0 = data.zip(resWithStd).toSeq.toDF("features", "expected")

val standardScaler0 = new StandardScaler()
val standardScalerEst0 = new StandardScaler()
.setInputCol("features")
.setOutputCol("standardized_features")
.fit(df0)
val standardScaler0 = standardScalerEst0.fit(df0)
MLTestingUtils.checkCopyAndUids(standardScalerEst0, standardScaler0)

assertResult(standardScaler0.transform(df0))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,11 @@ class StringIndexerSuite
val indexer = new StringIndexer()
.setInputCol("label")
.setOutputCol("labelIndex")
.fit(df)
val indexerModel = indexer.fit(df)

// copied model must have the same parent.
MLTestingUtils.checkCopy(indexer)
MLTestingUtils.checkCopyAndUids(indexer, indexerModel)

val transformed = indexer.transform(df)
val transformed = indexerModel.transform(df)
val attr = Attribute.fromStructField(transformed.schema("labelIndex"))
.asInstanceOf[NominalAttribute]
assert(attr.values.get === Array("a", "c", "b"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,7 @@ class VectorIndexerSuite extends SparkFunSuite with MLlibTestSparkContext
val vectorIndexer = getIndexer
val model = vectorIndexer.fit(densePoints1) // vectors of length 3

// copied model must have the same parent.
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(vectorIndexer, model)

model.transform(densePoints1) // should work
model.transform(sparsePoints1) // should work
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,14 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul

val docDF = doc.zip(expected).toDF("text", "expected")

val model = new Word2Vec()
val w2v = new Word2Vec()
.setVectorSize(3)
.setInputCol("text")
.setOutputCol("result")
.setSeed(42L)
.fit(docDF)
val model = w2v.fit(docDF)

// copied model must have the same parent.
MLTestingUtils.checkCopy(model)
MLTestingUtils.checkCopyAndUids(w2v, model)

// These expectations are just magic values, characterizing the current
// behavior. The test needs to be updated to be more general, see SPARK-11502
Expand Down
Loading