Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,7 @@ class VectorIndexerSuite extends SparkFunSuite with MLlibTestSparkContext

densePoints1 = densePoints1Seq.map(FeatureData).toDF()
sparsePoints1 = sparsePoints1Seq.map(FeatureData).toDF()
// TODO: If we directly use `toDF` without parallelize, the test in
// "Throws error when given RDDs with different size vectors" is failed for an unknown reason.
densePoints2 = sc.parallelize(densePoints2Seq, 2).map(FeatureData).toDF()
densePoints2 = densePoints2Seq.map(FeatureData).toDF()
sparsePoints2 = sparsePoints2Seq.map(FeatureData).toDF()
badPoints = badPointsSeq.map(FeatureData).toDF()
}
Expand Down Expand Up @@ -121,10 +119,17 @@ class VectorIndexerSuite extends SparkFunSuite with MLlibTestSparkContext

model.transform(densePoints1) // should work
model.transform(sparsePoints1) // should work
intercept[SparkException] {
// If the data is local Dataset, it throws AssertionError directly.
intercept[AssertionError] {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe off-topic for this review, but is this an assertion error? bad input shouldn't cause an assertion to trip.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is indeed a problem, let's try to find out all similar cases and resolved them in a separate PR. Thanks.

model.transform(densePoints2).collect()
logInfo("Did not throw error when fit, transform were called on vectors of different lengths")
}
// If the data is distributed Dataset, it throws SparkException
// which is the wrapper of AssertionError.
intercept[SparkException] {
model.transform(densePoints2.repartition(2)).collect()
logInfo("Did not throw error when fit, transform were called on vectors of different lengths")
}
intercept[SparkException] {
vectorIndexer.fit(badPoints)
logInfo("Did not throw error when fitting vectors of different lengths in same RDD.")
Expand Down