Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,20 +758,21 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
probabilityCol="probability", rawPredictionCol="rawPrediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
numTrees=20, featureSubsetStrategy="auto", seed=None):
numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add some doc string tests?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generally spark avoid having too many verbose tests in python doc string, so I think we are ok here.

"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
probabilityCol="probability", rawPredictionCol="rawPrediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
numTrees=20, featureSubsetStrategy="auto", seed=None)
numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0)
"""
super(RandomForestClassifier, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.classification.RandomForestClassifier", self.uid)
self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
impurity="gini", numTrees=20, featureSubsetStrategy="auto")
impurity="gini", numTrees=20, featureSubsetStrategy="auto",
subsamplingRate=1.0)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)

Expand All @@ -781,13 +782,13 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
probabilityCol="probability", rawPredictionCol="rawPrediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
probabilityCol="probability", rawPredictionCol="rawPrediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, \
impurity="gini", numTrees=20, featureSubsetStrategy="auto")
impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0)
Sets params for linear classification.
"""
kwargs = self.setParams._input_kwargs
Expand Down
12 changes: 6 additions & 6 deletions python/pyspark/ml/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ class RandomForestParams(TreeEnsembleParams):
featureSubsetStrategy = \
Param(Params._dummy(), "featureSubsetStrategy",
"The number of features to consider for splits at each tree node. Supported " +
"options: " + ", ".join(supportedFeatureSubsetStrategies) + " (0.0-1.0], [1-n].",
"options: " + ", ".join(supportedFeatureSubsetStrategies) + ", (0.0-1.0], [1-n].",
typeConverter=TypeConverters.toString)

def __init__(self):
Expand Down Expand Up @@ -828,7 +828,7 @@ def featureImportances(self):
@inherit_doc
class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
JavaMLWritable, JavaMLReadable):
JavaMLWritable, JavaMLReadable, HasVarianceCol):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you like to group all the Has* parameters? Just a minor comment on the style.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that doesn't seem to be the existing style, so adding to the end instead

"""
`Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
learning algorithm for regression.
Expand Down Expand Up @@ -876,13 +876,13 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
featureSubsetStrategy="auto"):
featureSubsetStrategy="auto", varianceCol=None):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
featureSubsetStrategy="auto")
featureSubsetStrategy="auto", varianceCol=None)
"""
super(RandomForestRegressor, self).__init__()
self._java_obj = self._new_java_obj(
Expand All @@ -900,13 +900,13 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
featureSubsetStrategy="auto"):
featureSubsetStrategy="auto", varianceCol=None):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
featureSubsetStrategy="auto")
featureSubsetStrategy="auto", varianceCol=None)
Sets params for linear regression.
"""
kwargs = self.setParams._input_kwargs
Expand Down