From f95744f2d5825c784b3c2caf74ab74dd62a1b40a Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Wed, 22 Jan 2020 17:46:05 -0800 Subject: [PATCH 1/3] [MINOR][ML] Deprecate numTrees in GBT --- .../spark/ml/classification/GBTClassifier.scala | 11 ++++++++--- .../apache/spark/ml/regression/GBTRegressor.scala | 11 ++++++++--- .../ml/classification/GBTClassifierSuite.scala | 13 +++++++------ .../spark/ml/regression/GBTRegressorSuite.scala | 10 +++++----- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 4044c0878921..67e00f599488 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -342,7 +342,12 @@ class GBTClassificationModel private[ml]( } } - /** Number of trees in ensemble */ + /** + * Number of trees in ensemble + * + * @deprecated Use [[getNumTrees]] instead. This method will be removed in 3.1.0 + */ + @deprecated("Use getNumTrees instead. This method will be removed in 3.1.0.", "3.0.0") val numTrees: Int = trees.length @Since("1.4.0") @@ -353,7 +358,7 @@ class GBTClassificationModel private[ml]( @Since("1.4.0") override def toString: String = { - s"GBTClassificationModel: uid = $uid, numTrees=$numTrees, numClasses=$numClasses, " + + s"GBTClassificationModel: uid = $uid, numTrees=$getNumTrees, numClasses=$numClasses, " + s"numFeatures=$numFeatures" } @@ -374,7 +379,7 @@ class GBTClassificationModel private[ml]( /** Raw prediction for the positive class. */ private def margin(features: Vector): Double = { val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction) - blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1) + blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1) } /** (private[ml]) Convert to a model in the old API */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 29991f59e37c..9fa58e3095bf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -300,10 +300,15 @@ class GBTRegressionModel private[ml]( // TODO: When we add a generic Boosting class, handle transform there? SPARK-7129 // Classifies by thresholding sum of weighted tree predictions val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction) - blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1) + blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1) } - /** Number of trees in ensemble */ + /** + * Number of trees in ensemble + * + * @deprecated Use [[getNumTrees]] instead. This method will be removed in 3.1.0 + */ + @deprecated("Use getNumTrees instead. This method will be removed in 3.1.0.", "3.0.0") val numTrees: Int = trees.length @Since("1.4.0") @@ -314,7 +319,7 @@ class GBTRegressionModel private[ml]( @Since("1.4.0") override def toString: String = { - s"GBTRegressionModel: uid=$uid, numTrees=$numTrees, numFeatures=$numFeatures" + s"GBTRegressionModel: uid=$uid, numTrees=$getNumTrees, numFeatures=$numFeatures" } /** diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala index abeb4b5331ac..a2208edcb839 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala @@ -179,7 +179,8 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest { assert(raw.size === 2) // check that raw prediction is tree predictions dot tree weights val treePredictions = gbtModel.trees.map(_.rootNode.predictImpl(features).prediction) - val prediction = blas.ddot(gbtModel.numTrees, treePredictions, 1, gbtModel.treeWeights, 1) + val prediction = blas.ddot(gbtModel.getNumTrees, treePredictions, 1, + gbtModel.treeWeights, 1) assert(raw ~== Vectors.dense(-prediction, prediction) relTol eps) // Compare rawPrediction with probability @@ -436,9 +437,9 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest { gbt.setValidationIndicatorCol(validationIndicatorCol) val modelWithValidation = gbt.fit(trainDF.union(validationDF)) - assert(modelWithoutValidation.numTrees === numIter) + assert(modelWithoutValidation.getNumTrees === numIter) // early stop - assert(modelWithValidation.numTrees < numIter) + assert(modelWithValidation.getNumTrees < numIter) val (errorWithoutValidation, errorWithValidation) = { val remappedRdd = validationData.map { @@ -457,10 +458,10 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest { modelWithoutValidation.treeWeights, modelWithoutValidation.getOldLossType, OldAlgo.Classification) assert(evaluationArray.length === numIter) - assert(evaluationArray(modelWithValidation.numTrees) > - evaluationArray(modelWithValidation.numTrees - 1)) + assert(evaluationArray(modelWithValidation.getNumTrees) > + evaluationArray(modelWithValidation.getNumTrees - 1)) var i = 1 - while (i < modelWithValidation.numTrees) { + while (i < modelWithValidation.getNumTrees) { assert(evaluationArray(i) <= evaluationArray(i - 1)) i += 1 } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala index 35c0fc9b02b1..04b0d4b8470f 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala @@ -274,9 +274,9 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest { gbt.setValidationIndicatorCol(validationIndicatorCol) val modelWithValidation = gbt.fit(trainDF.union(validationDF)) - assert(modelWithoutValidation.numTrees === numIter) + assert(modelWithoutValidation.getNumTrees === numIter) // early stop - assert(modelWithValidation.numTrees < numIter) + assert(modelWithValidation.getNumTrees < numIter) val errorWithoutValidation = GradientBoostedTrees.computeWeightedError( validationData.map(_.toInstance), @@ -294,10 +294,10 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest { modelWithoutValidation.treeWeights, modelWithoutValidation.getOldLossType, OldAlgo.Regression) assert(evaluationArray.length === numIter) - assert(evaluationArray(modelWithValidation.numTrees) > - evaluationArray(modelWithValidation.numTrees - 1)) + assert(evaluationArray(modelWithValidation.getNumTrees) > + evaluationArray(modelWithValidation.getNumTrees - 1)) var i = 1 - while (i < modelWithValidation.numTrees) { + while (i < modelWithValidation.getNumTrees) { assert(evaluationArray(i) <= evaluationArray(i - 1)) i += 1 } From 8744e176e3cc523b8d1cb92cea82c2fb61a66044 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Thu, 23 Jan 2020 22:09:03 -0800 Subject: [PATCH 2/3] remove numTrees in GBT in 3.0.0 --- .../apache/spark/ml/classification/GBTClassifier.scala | 8 -------- .../org/apache/spark/ml/regression/GBTRegressor.scala | 8 -------- 2 files changed, 16 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 67e00f599488..46810bccc8e6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -342,14 +342,6 @@ class GBTClassificationModel private[ml]( } } - /** - * Number of trees in ensemble - * - * @deprecated Use [[getNumTrees]] instead. This method will be removed in 3.1.0 - */ - @deprecated("Use getNumTrees instead. This method will be removed in 3.1.0.", "3.0.0") - val numTrees: Int = trees.length - @Since("1.4.0") override def copy(extra: ParamMap): GBTClassificationModel = { copyValues(new GBTClassificationModel(uid, _trees, _treeWeights, numFeatures, numClasses), diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 9fa58e3095bf..2c2558f00bb1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -303,14 +303,6 @@ class GBTRegressionModel private[ml]( blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1) } - /** - * Number of trees in ensemble - * - * @deprecated Use [[getNumTrees]] instead. This method will be removed in 3.1.0 - */ - @deprecated("Use getNumTrees instead. This method will be removed in 3.1.0.", "3.0.0") - val numTrees: Int = trees.length - @Since("1.4.0") override def copy(extra: ParamMap): GBTRegressionModel = { copyValues(new GBTRegressionModel(uid, _trees, _treeWeights, numFeatures), From 0b818238bb576c1d8d2ea012f26281ddb2c9b5f5 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Thu, 23 Jan 2020 23:14:50 -0800 Subject: [PATCH 3/3] fix mima problem --- project/MimaExcludes.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index f1bbe0b10e22..68e9313805e1 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -344,6 +344,10 @@ object MimaExcludes { ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel.layers"), ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel.this"), + // [SPARK-30630][ML] Remove numTrees in GBT + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.numTrees"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.numTrees"), + // Data Source V2 API changes (problem: Problem) => problem match { case MissingClassProblem(cls) =>