Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol):
"""
Logistic regression.
Currently, this class only supports binary classification.

>>> from pyspark.sql import Row
>>> from pyspark.mllib.linalg import Vectors
Expand Down Expand Up @@ -96,8 +97,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
# is an L2 penalty. For alpha = 1, it is an L1 penalty.
self.elasticNetParam = \
Param(self, "elasticNetParam",
"the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " +
"is an L2 penalty. For alpha = 1, it is an L1 penalty.")
"the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
"the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
#: param for whether to fit an intercept term.
self.fitIntercept = Param(self, "fitIntercept", "whether to fit an intercept term.")
#: param for threshold in binary classification prediction, in range [0, 1].
Expand Down Expand Up @@ -656,6 +657,13 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
HasRawPredictionCol):
"""
Naive Bayes Classifiers.
It supports both Multinomial and Bernoulli NB. Multinomial NB
(`http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have not tried generating this doc myself. How does this link render?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will make this link in italics, I just refer other links at classification.py.

can handle finitely supported discrete data. For example, by converting documents into
TF-IDF vectors, it can be used for document classification. By making every vector a
binary (0/1) data, it can also be used as Bernoulli NB
(`http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html`).
The input feature values must be nonnegative.

>>> from pyspark.sql import Row
>>> from pyspark.mllib.linalg import Vectors
Expand Down
4 changes: 3 additions & 1 deletion python/pyspark/ml/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ def clusterCenters(self):
@inherit_doc
class KMeans(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed):
"""
K-means Clustering
K-means clustering with support for multiple parallel runs and a k-means++ like initialization
mode (the k-means|| algorithm by Bahmani et al). When multiple concurrent runs are requested,
they are executed together with joint passes over the data for efficiency.

>>> from pyspark.mllib.linalg import Vectors
>>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/ml/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from pyspark.ml.util import keyword_only
from pyspark.mllib.common import inherit_doc

__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator']
__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator',
'MulticlassClassificationEvaluator']


@inherit_doc
Expand Down
9 changes: 5 additions & 4 deletions python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@
from pyspark.mllib.common import inherit_doc
from pyspark.mllib.linalg import _convert_to_vector

__all__ = ['Binarizer', 'HashingTF', 'IDF', 'IDFModel', 'NGram', 'Normalizer', 'OneHotEncoder',
'PolynomialExpansion', 'RegexTokenizer', 'StandardScaler', 'StandardScalerModel',
'StringIndexer', 'StringIndexerModel', 'Tokenizer', 'VectorAssembler', 'VectorIndexer',
'Word2Vec', 'Word2VecModel', 'PCA', 'PCAModel', 'RFormula', 'RFormulaModel']
__all__ = ['Binarizer', 'Bucketizer', 'HashingTF', 'IDF', 'IDFModel', 'NGram', 'Normalizer',
'OneHotEncoder', 'PolynomialExpansion', 'RegexTokenizer', 'StandardScaler',
'StandardScalerModel', 'StringIndexer', 'StringIndexerModel', 'Tokenizer',
'VectorAssembler', 'VectorIndexer', 'Word2Vec', 'Word2VecModel', 'PCA',
'PCAModel', 'RFormula', 'RFormulaModel']


@inherit_doc
Expand Down