Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.ml.regression

import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams}
Expand All @@ -36,30 +36,39 @@ import org.apache.spark.sql.DataFrame
* for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
@Experimental
final class DecisionTreeRegressor(override val uid: String)
final class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
with DecisionTreeParams with TreeRegressorParams {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("dtr"))

// Override parameter setters from parent trait for Java API compatibility.

@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)

@Since("1.4.0")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

insert a blank line above the @SInCE tag. Please do likewise at other tags.

override def setMaxBins(value: Int): this.type = super.setMaxBins(value)

@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)

@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)

@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)

@Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)

@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)

@Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value)

override protected def train(dataset: DataFrame): DecisionTreeRegressionModel = {
Expand All @@ -78,9 +87,11 @@ final class DecisionTreeRegressor(override val uid: String)
subsamplingRate = 1.0)
}

@Since("1.4.0")
override def copy(extra: ParamMap): DecisionTreeRegressor = defaultCopy(extra)
}

@Since("1.4.0")
@Experimental
object DecisionTreeRegressor {
/** Accessor for supported impurities: variance */
Expand All @@ -93,6 +104,7 @@ object DecisionTreeRegressor {
* It supports both continuous and categorical features.
* @param rootNode Root of the decision tree
*/
@Since("1.4.0")
@Experimental
final class DecisionTreeRegressionModel private[ml] (
override val uid: String,
Expand All @@ -115,10 +127,12 @@ final class DecisionTreeRegressionModel private[ml] (
rootNode.predictImpl(features).prediction
}

@Since("1.4.0")
override def copy(extra: ParamMap): DecisionTreeRegressionModel = {
copyValues(new DecisionTreeRegressionModel(uid, rootNode, numFeatures), extra).setParent(parent)
}

@Since("1.4.0")
override def toString: String = {
s"DecisionTreeRegressionModel (uid=$uid) of depth $depth with $numNodes nodes"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.ml.regression
import com.github.fommil.netlib.BLAS.{getInstance => blas}

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams}
Expand All @@ -42,54 +42,65 @@ import org.apache.spark.sql.types.DoubleType
* learning algorithm for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
@Experimental
final class GBTRegressor(override val uid: String)
final class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
with GBTParams with TreeRegressorParams with Logging {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("gbtr"))

// Override parameter setters from parent trait for Java API compatibility.

// Parameters from TreeRegressorParams:

@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)

@Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)

@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)

@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)

@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)

@Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)

@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)

/**
* The impurity setting is ignored for GBT models.
* Individual trees are built using impurity "Variance."
*/
@Since("1.4.0")
override def setImpurity(value: String): this.type = {
logWarning("GBTRegressor.setImpurity should NOT be used")
this
}

// Parameters from TreeEnsembleParams:

@Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)

@Since("1.4.0")
override def setSeed(value: Long): this.type = {
logWarning("The 'seed' parameter is currently ignored by Gradient Boosting.")
super.setSeed(value)
}

// Parameters from GBTParams:

@Since("1.4.0")
override def setMaxIter(value: Int): this.type = super.setMaxIter(value)

@Since("1.4.0")
override def setStepSize(value: Double): this.type = super.setStepSize(value)

// Parameters for GBTRegressor:
Expand All @@ -100,6 +111,7 @@ final class GBTRegressor(override val uid: String)
* (default = squared)
* @group param
*/
@Since("1.4.0")
val lossType: Param[String] = new Param[String](this, "lossType", "Loss function which GBT" +
" tries to minimize (case-insensitive). Supported options:" +
s" ${GBTRegressor.supportedLossTypes.mkString(", ")}",
Expand All @@ -108,9 +120,11 @@ final class GBTRegressor(override val uid: String)
setDefault(lossType -> "squared")

/** @group setParam */
@Since("1.4.0")
def setLossType(value: String): this.type = set(lossType, value)

/** @group getParam */
@Since("1.4.0")
def getLossType: String = $(lossType).toLowerCase

/** (private[ml]) Convert new loss to old loss. */
Expand All @@ -135,13 +149,16 @@ final class GBTRegressor(override val uid: String)
GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures, numFeatures)
}

@Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressor = defaultCopy(extra)
}

@Since("1.4.0")
@Experimental
object GBTRegressor {
// The losses below should be lowercase.
/** Accessor for supported loss settings: squared (L2), absolute (L1) */
@Since("1.4.0")
final val supportedLossTypes: Array[String] = Array("squared", "absolute").map(_.toLowerCase)
}

Expand All @@ -154,6 +171,7 @@ object GBTRegressor {
* @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
@Since("1.4.0")
@Experimental
final class GBTRegressionModel private[ml](
override val uid: String,
Expand All @@ -172,11 +190,14 @@ final class GBTRegressionModel private[ml](
* @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
@Since("1.4.0")
def this(uid: String, _trees: Array[DecisionTreeRegressionModel], _treeWeights: Array[Double]) =
this(uid, _trees, _treeWeights, -1)

@Since("1.4.0")
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]

@Since("1.4.0")
override def treeWeights: Array[Double] = _treeWeights

override protected def transformImpl(dataset: DataFrame): DataFrame = {
Expand All @@ -194,11 +215,13 @@ final class GBTRegressionModel private[ml](
blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
}

@Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressionModel = {
copyValues(new GBTRegressionModel(uid, _trees, _treeWeights, numFeatures),
extra).setParent(parent)
}

@Since("1.4.0")
override def toString: String = {
s"GBTRegressionModel (uid=$uid) with $numTrees trees"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.ml.regression

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol}
Expand Down Expand Up @@ -124,32 +124,42 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
*
* Uses [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/
@Since("1.5.0")
@Experimental
class IsotonicRegression(override val uid: String) extends Estimator[IsotonicRegressionModel]
with IsotonicRegressionBase {
class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[IsotonicRegressionModel] with IsotonicRegressionBase {

@Since("1.5.0")
def this() = this(Identifiable.randomUID("isoReg"))

/** @group setParam */
@Since("1.5.0")
def setLabelCol(value: String): this.type = set(labelCol, value)

/** @group setParam */
@Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)

/** @group setParam */
@Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value)

/** @group setParam */
@Since("1.5.0")
def setIsotonic(value: Boolean): this.type = set(isotonic, value)

/** @group setParam */
@Since("1.5.0")
def setWeightCol(value: String): this.type = set(weightCol, value)

/** @group setParam */
@Since("1.5.0")
def setFeatureIndex(value: Int): this.type = set(featureIndex, value)

@Since("1.5.0")
override def copy(extra: ParamMap): IsotonicRegression = defaultCopy(extra)

@Since("1.5.0")
override def fit(dataset: DataFrame): IsotonicRegressionModel = {
validateAndTransformSchema(dataset.schema, fitting = true)
// Extract columns from data. If dataset is persisted, do not persist oldDataset.
Expand All @@ -163,6 +173,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg
copyValues(new IsotonicRegressionModel(uid, oldModel).setParent(this))
}

@Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = true)
}
Expand All @@ -178,34 +189,42 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg
* @param oldModel A [[org.apache.spark.mllib.regression.IsotonicRegressionModel]]
* model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/
@Since("1.5.0")
@Experimental
class IsotonicRegressionModel private[ml] (
override val uid: String,
private val oldModel: MLlibIsotonicRegressionModel)
extends Model[IsotonicRegressionModel] with IsotonicRegressionBase {

/** @group setParam */
@Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)

/** @group setParam */
@Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value)

/** @group setParam */
@Since("1.5.0")
def setFeatureIndex(value: Int): this.type = set(featureIndex, value)

/** Boundaries in increasing order for which predictions are known. */
@Since("1.5.0")
def boundaries: Vector = Vectors.dense(oldModel.boundaries)

/**
* Predictions associated with the boundaries at the same index, monotone because of isotonic
* regression.
*/
@Since("1.5.0")
def predictions: Vector = Vectors.dense(oldModel.predictions)

@Since("1.5.0")
override def copy(extra: ParamMap): IsotonicRegressionModel = {
copyValues(new IsotonicRegressionModel(uid, oldModel), extra).setParent(parent)
}

@Since("1.5.0")
override def transform(dataset: DataFrame): DataFrame = {
val predict = dataset.schema($(featuresCol)).dataType match {
case DoubleType =>
Expand All @@ -217,6 +236,7 @@ class IsotonicRegressionModel private[ml] (
dataset.withColumn($(predictionCol), predict(col($(featuresCol))))
}

@Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = false)
}
Expand Down
Loading