diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 5978d8f4d3a01..6702dce5545a9 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -34,6 +34,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
                          HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol):
     """
     Logistic regression.
+    Currently, this class only supports binary classification.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.mllib.linalg import Vectors
@@ -96,8 +97,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         #  is an L2 penalty. For alpha = 1, it is an L1 penalty.
         self.elasticNetParam = \
             Param(self, "elasticNetParam",
-                  "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " +
-                  "is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+                  "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
+                  "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
         #: param for whether to fit an intercept term.
         self.fitIntercept = Param(self, "fitIntercept", "whether to fit an intercept term.")
         #: param for threshold in binary classification prediction, in range [0, 1].
@@ -656,6 +657,13 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
                  HasRawPredictionCol):
     """
     Naive Bayes Classifiers.
+    It supports both Multinomial and Bernoulli NB. Multinomial NB
+    (`http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html`)
+    can handle finitely supported discrete data. For example, by converting documents into
+    TF-IDF vectors, it can be used for document classification. By making every vector a
+    binary (0/1) data, it can also be used as Bernoulli NB
+    (`http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html`).
+    The input feature values must be nonnegative.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.mllib.linalg import Vectors
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index b5e9b6549d9f1..48338713a29ea 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -37,7 +37,9 @@ def clusterCenters(self):
 @inherit_doc
 class KMeans(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed):
     """
-    K-means Clustering
+    K-means clustering with support for multiple parallel runs and a k-means++ like initialization
+    mode (the k-means|| algorithm by Bahmani et al). When multiple concurrent runs are requested,
+    they are executed together with joint passes over the data for efficiency.
 
     >>> from pyspark.mllib.linalg import Vectors
     >>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index 06e809352225b..2734092575ad9 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -23,7 +23,8 @@
 from pyspark.ml.util import keyword_only
 from pyspark.mllib.common import inherit_doc
 
-__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator']
+__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator',
+           'MulticlassClassificationEvaluator']
 
 
 @inherit_doc
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index cb4dfa21298ce..535d55326646c 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -26,10 +26,11 @@
 from pyspark.mllib.common import inherit_doc
 from pyspark.mllib.linalg import _convert_to_vector
 
-__all__ = ['Binarizer', 'HashingTF', 'IDF', 'IDFModel', 'NGram', 'Normalizer', 'OneHotEncoder',
-           'PolynomialExpansion', 'RegexTokenizer', 'StandardScaler', 'StandardScalerModel',
-           'StringIndexer', 'StringIndexerModel', 'Tokenizer', 'VectorAssembler', 'VectorIndexer',
-           'Word2Vec', 'Word2VecModel', 'PCA', 'PCAModel', 'RFormula', 'RFormulaModel']
+__all__ = ['Binarizer', 'Bucketizer', 'HashingTF', 'IDF', 'IDFModel', 'NGram', 'Normalizer',
+           'OneHotEncoder', 'PolynomialExpansion', 'RegexTokenizer', 'StandardScaler',
+           'StandardScalerModel', 'StringIndexer', 'StringIndexerModel', 'Tokenizer',
+           'VectorAssembler', 'VectorIndexer', 'Word2Vec', 'Word2VecModel', 'PCA',
+           'PCAModel', 'RFormula', 'RFormulaModel']
 
 
 @inherit_doc