diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index d6d713ca5303..2ec22f23e3c3 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -65,6 +65,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti DenseVector([5.5...]) >>> model.intercept -2.68... + >>> model.numFeatures + 1 >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF() >>> result = model.transform(test0).head() >>> result.prediction @@ -92,6 +94,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti True >>> model.intercept == model2.intercept True + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.3.0 """ @@ -239,6 +243,14 @@ def intercept(self): """ return self._call_java("intercept") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + @property @since("2.0.0") def summary(self): @@ -524,6 +536,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred 1 >>> model.featureImportances SparseVector(1, {0: 1.0}) + >>> model.numFeatures + 1 >>> print(model.toDebugString) DecisionTreeClassificationModel (uid=...) of depth 1 with 3 nodes... >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) @@ -548,6 +562,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred >>> model2 = DecisionTreeClassificationModel.load(model_path) >>> model.featureImportances == model2.featureImportances True + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.4.0 """ @@ -627,6 +643,14 @@ def featureImportances(self): """ return self._call_java("featureImportances") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + @inherit_doc class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed, @@ -668,6 +692,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) >>> model.transform(test1).head().prediction 1.0 + >>> model.numFeatures + 1 >>> model.trees [DecisionTreeClassificationModel (uid=...) of depth..., DecisionTreeClassificationModel...] >>> rfc_path = temp_path + "/rfc" @@ -680,6 +706,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred >>> model2 = RandomForestClassificationModel.load(model_path) >>> model.featureImportances == model2.featureImportances True + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.4.0 """ @@ -752,6 +780,14 @@ def featureImportances(self): """ return self._call_java("featureImportances") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + @property @since("2.0.0") def trees(self): @@ -804,6 +840,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol 1.0 >>> model.totalNumNodes 15 + >>> model.numFeatures + 1 >>> print(model.toDebugString) GBTClassificationModel (uid=...)...with 5 trees... >>> gbtc_path = temp_path + "gbtc" @@ -820,6 +858,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol True >>> model.trees [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...] + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.4.0 """ @@ -907,6 +947,14 @@ def featureImportances(self): """ return self._call_java("featureImportances") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + @property @since("2.0.0") def trees(self): @@ -952,6 +1000,8 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF() >>> model.transform(test1).head().prediction 1.0 + >>> model.numFeatures + 2 >>> nb_path = temp_path + "/nb" >>> nb.save(nb_path) >>> nb2 = NaiveBayes.load(nb_path) @@ -969,6 +1019,8 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H >>> result = model3.transform(test0).head() >>> result.prediction 0.0 + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.5.0 """ @@ -1066,6 +1118,14 @@ def theta(self): """ return self._call_java("theta") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + @inherit_doc class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, @@ -1091,6 +1151,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, [2, 2, 2] >>> model.weights.size 12 + >>> model.numFeatures + 2 >>> testDF = spark.createDataFrame([ ... (Vectors.dense([1.0, 0.0]),), ... (Vectors.dense([0.0, 0.0]),)], ["features"]) @@ -1120,6 +1182,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, True >>> model3.layers == model.layers True + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.6.0 """ @@ -1267,6 +1331,14 @@ def weights(self): """ return self._call_java("weights") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasPredictionCol): """ diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 29efd6a852e8..9cab65ad73bd 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -71,6 +71,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction True >>> abs(model.intercept - 0.0) < 0.001 True + >>> model.numFeatures + 1 >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) >>> abs(model.transform(test1).head().prediction - 1.0) < 0.001 True @@ -90,6 +92,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction True >>> model.intercept == model2.intercept True + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.4.0 """ @@ -153,6 +157,14 @@ def intercept(self): """ return self._call_java("intercept") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + @property @since("2.0.0") def summary(self): @@ -658,6 +670,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi 3 >>> model.featureImportances SparseVector(1, {0: 1.0}) + >>> model.numFeatures + 1 >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) >>> model.transform(test0).head().prediction 0.0 @@ -678,6 +692,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi True >>> model.transform(test1).head().variance 0.0 + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.4.0 """ @@ -829,6 +845,14 @@ def featureImportances(self): """ return self._call_java("featureImportances") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + @inherit_doc class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed, @@ -850,6 +874,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi >>> model = rf.fit(df) >>> model.featureImportances SparseVector(1, {0: 1.0}) + >>> model.numFeatures + 1 >>> allclose(model.treeWeights, [1.0, 1.0]) True >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) @@ -872,6 +898,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi >>> model2 = RandomForestRegressionModel.load(model_path) >>> model.featureImportances == model2.featureImportances True + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.4.0 """ @@ -951,6 +979,14 @@ def featureImportances(self): """ return self._call_java("featureImportances") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + @inherit_doc class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, @@ -974,6 +1010,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, >>> model = gbt.fit(df) >>> model.featureImportances SparseVector(1, {0: 1.0}) + >>> model.numFeatures + 1 >>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1]) True >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) @@ -996,6 +1034,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, True >>> model.trees [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...] + >>> model.numFeatures == model2.numFeatures + True .. versionadded:: 1.4.0 """ @@ -1087,6 +1127,14 @@ def featureImportances(self): """ return self._call_java("featureImportances") + @property + @since("2.0.0") + def numFeatures(self): + """ + Number of features the model was trained on. + """ + return self._call_java("numFeatures") + @property @since("2.0.0") def trees(self):