diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index d1f3b2af1e48..07d44a9dca0c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -78,16 +78,19 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String def setOutputCol(value: String): this.type = set(outputCol, value) /** - * Param for how to handle invalid entries. Options are 'skip' (filter out rows with - * invalid values), 'error' (throw an error), or 'keep' (keep invalid values in a special - * additional bucket). + * Param for how to handle invalid entries containing NaN values. + * Values outside the splits will always be treated as errors. + * Options are 'skip' (filter out rows with invalid values), 'error' (throw an error), + * or 'keep' (keep invalid values in a special additional bucket). * Default: "error" * @group param */ // TODO: SPARK-18619 Make Bucketizer inherit from HasHandleInvalid. @Since("2.1.0") - val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle " + - "invalid entries. Options are skip (filter out rows with invalid values), " + + val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", + "how to handle invalid entries containing NaN values. " + + "Values outside the splits will always be treated as errors. " + + "Options are skip (filter out rows with invalid values), " + "error (throw an error), or keep (keep invalid values in a special additional bucket).", ParamValidators.inArray(Bucketizer.supportedHandleInvalids)) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 92f8549e9cb9..5aaeb7c4decf 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -356,7 +356,9 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav "splits specified will be treated as errors.", typeConverter=TypeConverters.toListFloat) - handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " + + handleInvalid = Param(Params._dummy(), "handleInvalid", + "how to handle invalid entries containing NaN values. " + + "Values outside the splits will always be treated as errors. " + "Options are 'skip' (filter out rows with invalid values), " + "'error' (throw an error), or 'keep' (keep invalid values in a special " + "additional bucket).",