Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class StringIndexer @Since("1.4.0") (

@Since("2.0.0")
override def fit(dataset: Dataset[_]): StringIndexerModel = {
transformSchema(dataset.schema, logging = true)
val counts = dataset.select(col($(inputCol)).cast(StringType))
.rdd
.map(_.getString(0))
Expand Down Expand Up @@ -160,7 +161,7 @@ class StringIndexerModel (
"Skip StringIndexerModel.")
return dataset.toDF
}
validateAndTransformSchema(dataset.schema)
transformSchema(dataset.schema, logging = true)

val indexer = udf { label: String =>
if (labelToIndex.contains(label)) {
Expand Down Expand Up @@ -305,6 +306,7 @@ class IndexToString private[ml] (@Since("1.5.0") override val uid: String)

@Since("2.0.0")
override def transform(dataset: Dataset[_]): DataFrame = {
transformSchema(dataset.schema, logging = true)
val inputColSchema = dataset.schema($(inputCol))
// If the labels array is empty use column metadata
val values = if (!isDefined(labels) || $(labels).isEmpty) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)

@Since("2.0.0")
override def transform(dataset: Dataset[_]): DataFrame = {
transformSchema(dataset.schema, logging = true)
// Schema transformation.
val schema = dataset.schema
lazy val first = dataset.toDF.first()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,20 @@ class StringIndexerSuite

test("StringIndexerModel can't overwrite output column") {
val df = spark.createDataFrame(Seq((1, 2), (3, 4))).toDF("input", "output")
intercept[IllegalArgumentException] {
new StringIndexer()
.setInputCol("input")
.setOutputCol("output")
.fit(df)
}

val indexer = new StringIndexer()
.setInputCol("input")
.setOutputCol("output")
.setOutputCol("indexedInput")
.fit(df)

intercept[IllegalArgumentException] {
indexer.transform(df)
indexer.setOutputCol("output").transform(df)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ class VectorAssemblerSuite
val assembler = new VectorAssembler()
.setInputCols(Array("a", "b", "c"))
.setOutputCol("features")
val thrown = intercept[SparkException] {
val thrown = intercept[IllegalArgumentException] {
assembler.transform(df)
}
assert(thrown.getMessage contains "VectorAssembler does not support the StringType type")
assert(thrown.getMessage contains "Data type StringType is not supported")
}

test("ML attributes") {
Expand Down