diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index a1d08b3a6e78..d18fb697994f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -27,7 +27,7 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.param.{Param, ParamMap, Params}
 import org.apache.spark.ml.util._
@@ -78,7 +78,6 @@ abstract class PipelineStage extends Params with Logging {
 }
 
 /**
- * :: Experimental ::
  * A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each
  * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline#fit]] is called, the
  * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator#fit]] method will
@@ -90,7 +89,6 @@ abstract class PipelineStage extends Params with Logging {
  * an identity transformer.
  */
 @Since("1.2.0")
-@Experimental
 class Pipeline @Since("1.4.0") (
   @Since("1.4.0") override val uid: String) extends Estimator[PipelineModel] with MLWritable {
 
@@ -282,11 +280,9 @@ object Pipeline extends MLReadable[Pipeline] {
 }
 
 /**
- * :: Experimental ::
  * Represents a fitted pipeline.
  */
 @Since("1.2.0")
-@Experimental
 class PipelineModel private[ml] (
     @Since("1.4.0") override val uid: String,
     @Since("1.4.0") val stages: Array[Transformer])
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index c65d3d5b5442..082848c9ded5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -21,7 +21,7 @@ import org.apache.hadoop.fs.Path
 import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
@@ -36,14 +36,12 @@ import org.apache.spark.sql.Dataset
 
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm
  * for classification.
  * It supports both binary and multiclass labels, as well as both continuous and categorical
  * features.
  */
 @Since("1.4.0")
-@Experimental
 class DecisionTreeClassifier @Since("1.4.0") (
     @Since("1.4.0") override val uid: String)
   extends ProbabilisticClassifier[Vector, DecisionTreeClassifier, DecisionTreeClassificationModel]
@@ -127,7 +125,6 @@ class DecisionTreeClassifier @Since("1.4.0") (
 }
 
 @Since("1.4.0")
-@Experimental
 object DecisionTreeClassifier extends DefaultParamsReadable[DecisionTreeClassifier] {
   /** Accessor for supported impurities: entropy, gini */
   @Since("1.4.0")
@@ -138,13 +135,11 @@ object DecisionTreeClassifier extends DefaultParamsReadable[DecisionTreeClassifi
 }
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for classification.
  * It supports both binary and multiclass labels, as well as both continuous and categorical
  * features.
  */
 @Since("1.4.0")
-@Experimental
 class DecisionTreeClassificationModel private[ml] (
     @Since("1.4.0")override val uid: String,
     @Since("1.4.0")override val rootNode: Node,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 4e534baddc63..5946a12933ff 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -21,7 +21,7 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas}
 import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.feature.LabeledPoint
@@ -40,7 +40,6 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.DoubleType
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
  * learning algorithm for classification.
  * It supports binary labels, as well as both continuous and categorical features.
@@ -57,7 +56,6 @@ import org.apache.spark.sql.types.DoubleType
  *    [https://issues.apache.org/jira/browse/SPARK-4240]
  */
 @Since("1.4.0")
-@Experimental
 class GBTClassifier @Since("1.4.0") (
     @Since("1.4.0") override val uid: String)
   extends Predictor[Vector, GBTClassifier, GBTClassificationModel]
@@ -149,7 +147,6 @@ class GBTClassifier @Since("1.4.0") (
 }
 
 @Since("1.4.0")
-@Experimental
 object GBTClassifier extends DefaultParamsReadable[GBTClassifier] {
 
   /** Accessor for supported loss settings: logistic */
@@ -161,7 +158,6 @@ object GBTClassifier extends DefaultParamsReadable[GBTClassifier] {
 }
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
  * model for classification.
  * It supports binary labels, as well as both continuous and categorical features.
@@ -171,7 +167,6 @@ object GBTClassifier extends DefaultParamsReadable[GBTClassifier] {
  * @param _treeWeights  Weights for the decision trees in the ensemble.
  */
 @Since("1.6.0")
-@Experimental
 class GBTClassificationModel private[ml](
     @Since("1.6.0") override val uid: String,
     private val _trees: Array[DecisionTreeRegressionModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 9c9f5ced4e35..e157bdeb5b7e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -151,13 +151,11 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 }
 
 /**
- * :: Experimental ::
  * Logistic regression.
  * Currently, this class only supports binary classification.  It will support multiclass
  * in the future.
  */
 @Since("1.2.0")
-@Experimental
 class LogisticRegression @Since("1.2.0") (
     @Since("1.4.0") override val uid: String)
   extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel]
@@ -475,11 +473,9 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
 }
 
 /**
- * :: Experimental ::
  * Model produced by [[LogisticRegression]].
  */
 @Since("1.4.0")
-@Experimental
 class LogisticRegressionModel private[spark] (
     @Since("1.4.0") override val uid: String,
     @Since("2.0.0") val coefficients: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index c99ae30155e3..ab977c8802e3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.classification
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
@@ -63,7 +63,6 @@ private[ml] trait NaiveBayesParams extends PredictorParams {
 }
 
 /**
- * :: Experimental ::
  * Naive Bayes Classifiers.
  * It supports both Multinomial NB
  * ([[http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html]])
@@ -74,7 +73,6 @@ private[ml] trait NaiveBayesParams extends PredictorParams {
  * The input feature values must be nonnegative.
  */
 @Since("1.5.0")
-@Experimental
 class NaiveBayes @Since("1.5.0") (
     @Since("1.5.0") override val uid: String)
   extends ProbabilisticClassifier[Vector, NaiveBayes, NaiveBayesModel]
@@ -121,14 +119,12 @@ object NaiveBayes extends DefaultParamsReadable[NaiveBayes] {
 }
 
 /**
- * :: Experimental ::
  * Model produced by [[NaiveBayes]]
  * @param pi log of class priors, whose dimension is C (number of classes)
  * @param theta log of class conditional probabilities, whose dimension is C (number of classes)
  *              by D (number of features)
  */
 @Since("1.5.0")
-@Experimental
 class NaiveBayesModel private[ml] (
     @Since("1.5.0") override val uid: String,
     @Since("2.0.0") val pi: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 047a378b79aa..f4ab0a074c42 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -29,7 +29,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.Vector
@@ -117,7 +117,6 @@ private[ml] object OneVsRestParams extends ClassifierTypeTrait {
 }
 
 /**
- * :: Experimental ::
  * Model produced by [[OneVsRest]].
  * This stores the models resulting from training k binary classifiers: one for each class.
  * Each example is scored against all k models, and the model with the highest score
@@ -130,7 +129,6 @@ private[ml] object OneVsRestParams extends ClassifierTypeTrait {
  *               (taking label 0).
  */
 @Since("1.4.0")
-@Experimental
 final class OneVsRestModel private[ml] (
     @Since("1.4.0") override val uid: String,
     private[ml] val labelMetadata: Metadata,
@@ -260,8 +258,6 @@ object OneVsRestModel extends MLReadable[OneVsRestModel] {
 }
 
 /**
- * :: Experimental ::
- *
  * Reduction of Multiclass Classification to Binary Classification.
  * Performs reduction using one against all strategy.
  * For a multiclass classification with k classes, train k models (one per class).
@@ -269,7 +265,6 @@ object OneVsRestModel extends MLReadable[OneVsRestModel] {
  * is picked to label the example.
  */
 @Since("1.4.0")
-@Experimental
 final class OneVsRest @Since("1.4.0") (
     @Since("1.4.0") override val uid: String)
   extends Estimator[OneVsRestModel] with OneVsRestParams with MLWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 9a26a5c5b143..4ab132e5f294 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.classification
 import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
@@ -36,14 +36,12 @@ import org.apache.spark.sql.functions._
 
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] learning algorithm for
  * classification.
  * It supports both binary and multiclass labels, as well as both continuous and categorical
  * features.
  */
 @Since("1.4.0")
-@Experimental
 class RandomForestClassifier @Since("1.4.0") (
     @Since("1.4.0") override val uid: String)
   extends ProbabilisticClassifier[Vector, RandomForestClassifier, RandomForestClassificationModel]
@@ -124,7 +122,6 @@ class RandomForestClassifier @Since("1.4.0") (
 }
 
 @Since("1.4.0")
-@Experimental
 object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifier] {
   /** Accessor for supported impurity settings: entropy, gini */
   @Since("1.4.0")
@@ -140,7 +137,6 @@ object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifi
 }
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] model for classification.
  * It supports both binary and multiclass labels, as well as both continuous and categorical
  * features.
@@ -149,7 +145,6 @@ object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifi
  *                Warning: These have null parents.
  */
 @Since("1.4.0")
-@Experimental
 class RandomForestClassificationModel private[ml] (
     @Since("1.5.0") override val uid: String,
     private val _trees: Array[DecisionTreeClassificationModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index fa9634fdfa7e..2b0862c60fdf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import scala.collection.mutable.ArrayBuilder
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute.BinaryAttribute
 import org.apache.spark.ml.linalg._
@@ -31,10 +31,8 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 
 /**
- * :: Experimental ::
  * Binarize a column of continuous features given a threshold.
  */
-@Experimental
 @Since("1.4.0")
 final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index caffc39e2be1..100d9e7f6cbc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
 import java.{util => ju}
 
 import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Model
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
@@ -31,10 +31,8 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
 
 /**
- * :: Experimental ::
  * `Bucketizer` maps a column of continuous features to a column of feature buckets.
  */
-@Experimental
 @Since("1.4.0")
 final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends Model[Bucketizer] with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
index 712634dffbf1..bd053e886f17 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute.{AttributeGroup, _}
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
@@ -57,11 +57,9 @@ private[feature] trait ChiSqSelectorParams extends Params
 }
 
 /**
- * :: Experimental ::
  * Chi-Squared feature selection, which selects categorical features to use for predicting a
  * categorical label.
  */
-@Experimental
 @Since("1.6.0")
 final class ChiSqSelector @Since("1.6.0") (@Since("1.6.0") override val uid: String)
   extends Estimator[ChiSqSelectorModel] with ChiSqSelectorParams with DefaultParamsWritable {
@@ -116,10 +114,8 @@ object ChiSqSelector extends DefaultParamsReadable[ChiSqSelector] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by [[ChiSqSelector]].
  */
-@Experimental
 @Since("1.6.0")
 final class ChiSqSelectorModel private[ml] (
     @Since("1.6.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
index 96e6f1c512e9..6299f74a6bf9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
@@ -18,7 +18,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.{Vectors, VectorUDT}
@@ -116,10 +116,8 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit
 }
 
 /**
- * :: Experimental ::
  * Extracts a vocabulary from document collections and generates a [[CountVectorizerModel]].
  */
-@Experimental
 @Since("1.5.0")
 class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends Estimator[CountVectorizerModel] with CountVectorizerParams with DefaultParamsWritable {
@@ -201,11 +199,9 @@ object CountVectorizer extends DefaultParamsReadable[CountVectorizer] {
 }
 
 /**
- * :: Experimental ::
  * Converts a text document to a sparse vector of token counts.
  * @param vocabulary An Array over terms. Only the terms in the vocabulary will be counted.
  */
-@Experimental
 @Since("1.5.0")
 class CountVectorizerModel(
     @Since("1.5.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
index 9605145e12c2..6ff36b35ca4c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import edu.emory.mathcs.jtransforms.dct._
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.BooleanParam
@@ -27,7 +27,6 @@ import org.apache.spark.ml.util._
 import org.apache.spark.sql.types.DataType
 
 /**
- * :: Experimental ::
  * A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero
  * padding is performed on the input vector.
  * It returns a real vector of the same length representing the DCT. The return vector is scaled
@@ -35,7 +34,6 @@ import org.apache.spark.sql.types.DataType
  *
  * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]].
  */
-@Experimental
 @Since("1.5.0")
 class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends UnaryTransformer[Vector, Vector, DCT] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
index d07833e5805d..f860b3a787b4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param.Param
@@ -27,12 +27,10 @@ import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.sql.types.DataType
 
 /**
- * :: Experimental ::
  * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
  * provided "weight" vector.  In other words, it scales each column of the dataset by a scalar
  * multiplier.
  */
-@Experimental
 @Since("1.4.0")
 class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index 6ca7336cd048..a8792a35ff4a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.param._
@@ -29,7 +29,6 @@ import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.{ArrayType, StructType}
 
 /**
- * :: Experimental ::
  * Maps a sequence of terms to their term frequencies using the hashing trick.
  * Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32)
  * to calculate the hash code value for the term object.
@@ -37,7 +36,6 @@ import org.apache.spark.sql.types.{ArrayType, StructType}
  * it is advisable to use a power of two as the numFeatures parameter;
  * otherwise the features will not be mapped evenly to the columns.
  */
-@Experimental
 @Since("1.2.0")
 class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 5d6287f0e3f1..6386dd8a1080 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml._
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param._
@@ -61,10 +61,8 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
 }
 
 /**
- * :: Experimental ::
  * Compute the Inverse Document Frequency (IDF) given a collection of documents.
  */
-@Experimental
 @Since("1.4.0")
 final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends Estimator[IDFModel] with IDFBase with DefaultParamsWritable {
@@ -111,10 +109,8 @@ object IDF extends DefaultParamsReadable[IDF] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by [[IDF]].
  */
-@Experimental
 @Since("1.4.0")
 class IDFModel private[ml] (
     @Since("1.4.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
index dca28b5c5d34..7b11f86279b9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
 import scala.collection.mutable.ArrayBuilder
 
 import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
@@ -32,7 +32,6 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 
 /**
- * :: Experimental ::
  * Implements the feature interaction transform. This transformer takes in Double and Vector type
  * columns and outputs a flattened vector of their feature interactions. To handle interaction,
  * we first one-hot encode any nominal features. Then, a vector of the feature cross-products is
@@ -42,7 +41,6 @@ import org.apache.spark.sql.types._
  * `Vector(6, 8)` if all input features were numeric. If the first feature was instead nominal
  * with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`.
  */
-@Experimental
 @Since("1.6.0")
 class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer
   with HasInputCols with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
index f7f1d4203959..6cefa7086c88 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
@@ -23,6 +23,8 @@ import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.linalg.Vector
 
 /**
+ * :: Experimental ::
+ *
  * Class that represents the features and labels of a data point.
  *
  * @param label Label for this data point.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index d5ad5abced46..7b03f0c0f341 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.{DoubleParam, ParamMap, Params}
@@ -74,7 +74,6 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
 }
 
 /**
- * :: Experimental ::
  * Rescale each feature individually to a common range [min, max] linearly using column summary
  * statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
  * feature E is calculated as,
@@ -85,7 +84,6 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
  * Note that since zero values will probably be transformed to non-zero values, output of the
  * transformer will be DenseVector even for sparse input.
  */
-@Experimental
 @Since("1.5.0")
 class MinMaxScaler @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends Estimator[MinMaxScalerModel] with MinMaxScalerParams with DefaultParamsWritable {
@@ -138,7 +136,6 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by [[MinMaxScaler]].
  *
  * @param originalMin min value for each original column during fitting
@@ -146,7 +143,6 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] {
  *
  * TODO: The transformer does not yet set the metadata in the output column (SPARK-8529).
  */
-@Experimental
 @Since("1.5.0")
 class MinMaxScalerModel private[ml] (
     @Since("1.5.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
index 9c1f1ad443bb..4463aea0097e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
@@ -17,14 +17,13 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
 
 /**
- * :: Experimental ::
  * A feature transformer that converts the input array of strings into an array of n-grams. Null
  * values in the input array are ignored.
  * It returns an array of n-grams where each n-gram is represented by a space-separated string of
@@ -34,7 +33,6 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
  * When the input array length is less than n (number of elements per n-gram), no n-grams are
  * returned.
  */
-@Experimental
 @Since("1.5.0")
 class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends UnaryTransformer[Seq[String], Seq[String], NGram] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
index f9cbad90c9f3..eb0690058013 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param.{DoubleParam, ParamValidators}
@@ -27,10 +27,8 @@ import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
 import org.apache.spark.sql.types.DataType
 
 /**
- * :: Experimental ::
  * Normalize a vector to have unit norm using the given p-norm.
  */
-@Experimental
 @Since("1.4.0")
 class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends UnaryTransformer[Vector, Vector, Normalizer] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index 01828ede6bc6..8b04b5de6fd2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.Vectors
@@ -29,7 +29,6 @@ import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.{DoubleType, NumericType, StructType}
 
 /**
- * :: Experimental ::
  * A one-hot encoder that maps a column of category indices to a column of binary vectors, with
  * at most a single one-value per row that indicates the input category index.
  * For example with 5 categories, an input value of 2.0 would map to an output vector of
@@ -42,7 +41,6 @@ import org.apache.spark.sql.types.{DoubleType, NumericType, StructType}
  *
  * @see [[StringIndexer]] for converting categorical values into category indices
  */
-@Experimental
 @Since("1.4.0")
 class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer
   with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index ef8b08545db2..6b913480fdc2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml._
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param._
@@ -59,12 +59,11 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
   }
 
 }
+
 /**
- * :: Experimental ::
  * PCA trains a model to project vectors to a lower dimensional space of the top [[PCA!.k]]
  * principal components.
  */
-@Experimental
 @Since("1.5.0")
 class PCA @Since("1.5.0") (
     @Since("1.5.0") override val uid: String)
@@ -116,14 +115,12 @@ object PCA extends DefaultParamsReadable[PCA] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by [[PCA]]. Transforms vectors to a lower dimensional space.
  *
  * @param pc A principal components Matrix. Each column is one principal component.
  * @param explainedVariance A vector of proportions of variance explained by
  *                          each principal component.
  */
-@Experimental
 @Since("1.5.0")
 class PCAModel private[ml] (
     @Since("1.5.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 7b35fdeaf40c..72fb35bd79ad 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import scala.collection.mutable
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators}
@@ -27,14 +27,12 @@ import org.apache.spark.ml.util._
 import org.apache.spark.sql.types.DataType
 
 /**
- * :: Experimental ::
  * Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
  * which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an
  * expansion of a product of sums expresses it as a sum of products by using the fact that
  * multiplication distributes over addition". Take a 2-variable feature vector as an example:
  * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
  */
-@Experimental
 @Since("1.4.0")
 class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends UnaryTransformer[Vector, Vector, PolynomialExpansion] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index 96b8e7d9f7fa..9a636bd8a5e4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute.NominalAttribute
@@ -64,7 +64,6 @@ private[feature] trait QuantileDiscretizerBase extends Params
 }
 
 /**
- * :: Experimental ::
  * `QuantileDiscretizer` takes a column with continuous features and outputs a column with binned
  * categorical features. The number of bins can be set using the `numBuckets` parameter.
  * The bin ranges are chosen using an approximate algorithm (see the documentation for
@@ -73,7 +72,6 @@ private[feature] trait QuantileDiscretizerBase extends Params
  * `relativeError` parameter. The lower and upper bin bounds will be `-Infinity` and `+Infinity`,
  * covering all real values.
  */
-@Experimental
 @Since("1.6.0")
 final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val uid: String)
   extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
index b8715746fee5..289037640fd4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.param.{Param, ParamMap}
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.util._
@@ -25,7 +25,6 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.types.StructType
 
 /**
- * :: Experimental ::
  * Implements the transformations which are defined by SQL statement.
  * Currently we only support SQL syntax like 'SELECT ... FROM __THIS__ ...'
  * where '__THIS__' represents the underlying table of the input dataset.
@@ -37,7 +36,6 @@ import org.apache.spark.sql.types.StructType
  *  - SELECT a, SQRT(b) AS b_sqrt FROM __THIS__ where a > 5
  *  - SELECT a, b, SUM(c) AS c_sum FROM __THIS__ GROUP BY a, b
  */
-@Experimental
 @Since("1.6.0")
 class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer
   with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index b4be95494fd1..2494cf51a2bd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml._
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param._
@@ -76,7 +76,6 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
 }
 
 /**
- * :: Experimental ::
  * Standardizes features by removing the mean and scaling to unit variance using column summary
  * statistics on the samples in the training set.
  *
@@ -85,7 +84,6 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
  *   corrected sample standard deviation]],
  * which is computed as the square root of the unbiased sample variance.
  */
-@Experimental
 @Since("1.2.0")
 class StandardScaler @Since("1.4.0") (
     @Since("1.4.0") override val uid: String)
@@ -138,13 +136,11 @@ object StandardScaler extends DefaultParamsReadable[StandardScaler] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by [[StandardScaler]].
  *
  * @param std Standard deviation of the StandardScalerModel
  * @param mean Mean of the StandardScalerModel
  */
-@Experimental
 @Since("1.2.0")
 class StandardScalerModel private[ml] (
     @Since("1.4.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 1a6f42f773cd..666070037cdd 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.param.{BooleanParam, ParamMap, StringArrayParam}
 import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
@@ -27,12 +27,10 @@ import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.{ArrayType, StringType, StructType}
 
 /**
- * :: Experimental ::
  * A feature transformer that filters out stop words from input.
  * Note: null values from input array are preserved unless adding null to stopWords explicitly.
  * @see [[http://en.wikipedia.org/wiki/Stop_words]]
  */
-@Experimental
 @Since("1.5.0")
 class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 028e540fe535..fe79e2ec808a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model, Transformer}
 import org.apache.spark.ml.attribute.{Attribute, NominalAttribute}
 import org.apache.spark.ml.param._
@@ -55,7 +55,6 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha
 }
 
 /**
- * :: Experimental ::
  * A label indexer that maps a string column of labels to an ML column of label indices.
  * If the input column is numeric, we cast it to string and index the string values.
  * The indices are in [0, numLabels), ordered by label frequencies.
@@ -63,7 +62,6 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha
  *
  * @see [[IndexToString]] for the inverse transformation
  */
-@Experimental
 @Since("1.4.0")
 class StringIndexer @Since("1.4.0") (
     @Since("1.4.0") override val uid: String) extends Estimator[StringIndexerModel]
@@ -112,7 +110,6 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by [[StringIndexer]].
  *
  * NOTE: During transformation, if the input column does not exist,
@@ -121,7 +118,6 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] {
  *
  * @param labels  Ordered list of labels, corresponding to indices to be assigned.
  */
-@Experimental
 @Since("1.4.0")
 class StringIndexerModel (
     @Since("1.4.0") override val uid: String,
@@ -250,7 +246,6 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
 }
 
 /**
- * :: Experimental ::
  * A [[Transformer]] that maps a column of indices back to a new column of corresponding
  * string values.
  * The index-string mapping is either from the ML attributes of the input column,
@@ -258,7 +253,6 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
  *
  * @see [[StringIndexer]] for converting strings into indices
  */
-@Experimental
 @Since("1.5.0")
 class IndexToString private[ml] (@Since("1.5.0") override val uid: String)
   extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 010c948749f3..45d8fa94a8f8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -17,19 +17,17 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
 
 /**
- * :: Experimental ::
  * A tokenizer that converts the input string to lowercase and then splits it by white spaces.
  *
  * @see [[RegexTokenizer]]
  */
-@Experimental
 @Since("1.2.0")
 class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends UnaryTransformer[String, Seq[String], Tokenizer] with DefaultParamsWritable {
@@ -59,13 +57,11 @@ object Tokenizer extends DefaultParamsReadable[Tokenizer] {
 }
 
 /**
- * :: Experimental ::
  * A regex based tokenizer that extracts tokens either by using the provided regex pattern to split
  * the text (default) or repeatedly matching the regex (if `gaps` is false).
  * Optional parameters also allow filtering tokens using a minimal length.
  * It returns an array of strings that can be empty.
  */
-@Experimental
 @Since("1.4.0")
 class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends UnaryTransformer[String, Seq[String], RegexTokenizer] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index 4939dabd987e..142a2ae44c69 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
 import scala.collection.mutable.ArrayBuilder
 
 import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute, UnresolvedAttribute}
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
@@ -32,10 +32,8 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 
 /**
- * :: Experimental ::
  * A feature transformer that merges multiple columns into a vector column.
  */
-@Experimental
 @Since("1.4.0")
 class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 5656a9f979fc..d1a5c2e82581 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, VectorUDT}
@@ -59,7 +59,6 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
 }
 
 /**
- * :: Experimental ::
  * Class for indexing categorical feature columns in a dataset of [[Vector]].
  *
  * This has 2 usage modes:
@@ -93,7 +92,6 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
  *  - Add warning if a categorical feature has only 1 category.
  *  - Add option for allowing unknown categories.
  */
-@Experimental
 @Since("1.4.0")
 class VectorIndexer @Since("1.4.0") (
     @Since("1.4.0") override val uid: String)
@@ -247,7 +245,6 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by [[VectorIndexer]]. Transform categorical features to use 0-based indices
  * instead of their original values.
  *  - Categorical features are mapped to indices.
@@ -263,7 +260,6 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] {
  *                      Values are maps from original features values to 0-based category indices.
  *                      If a feature is not in this map, it is treated as continuous.
  */
-@Experimental
 @Since("1.4.0")
 class VectorIndexerModel private[ml] (
     @Since("1.4.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
index 6769e490c51c..966ccb85d0e0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute.{Attribute, AttributeGroup}
 import org.apache.spark.ml.linalg._
@@ -29,7 +29,6 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.StructType
 
 /**
- * :: Experimental ::
  * This class takes a feature vector and outputs a new feature vector with a subarray of the
  * original features.
  *
@@ -40,7 +39,6 @@ import org.apache.spark.sql.types.StructType
  * The output vector will order features with the selected indices first (in the order given),
  * followed by the selected names (in the order given).
  */
-@Experimental
 @Since("1.5.0")
 final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 0cac3fa2d7e5..c2b434c3d5cb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -19,8 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param._
@@ -115,11 +114,9 @@ private[feature] trait Word2VecBase extends Params
 }
 
 /**
- * :: Experimental ::
  * Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
  * natural language processing or machine learning process.
  */
-@Experimental
 @Since("1.4.0")
 final class Word2Vec @Since("1.4.0") (
     @Since("1.4.0") override val uid: String)
@@ -202,10 +199,8 @@ object Word2Vec extends DefaultParamsReadable[Word2Vec] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by [[Word2Vec]].
  */
-@Experimental
 @Since("1.4.0")
 class Word2VecModel private[ml] (
     @Since("1.4.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index ecec61a72f82..e7780cf1c39f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -28,9 +28,9 @@ import scala.collection.JavaConverters._
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
-import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.ml.linalg.JsonVectorConverter
+import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.util.Identifiable
 
 /**
@@ -510,11 +510,9 @@ class IntArrayParam(parent: Params, name: String, doc: String, isValid: Array[In
 }
 
 /**
- * :: Experimental ::
  * A param and its value.
  */
 @Since("1.2.0")
-@Experimental
 case class ParamPair[T] @Since("1.2.0") (
     @Since("1.2.0") param: Param[T],
     @Since("1.2.0") value: T) {
@@ -797,11 +795,9 @@ trait Params extends Identifiable with Serializable {
 abstract class JavaParams extends Params
 
 /**
- * :: Experimental ::
  * A param to value map.
  */
 @Since("1.2.0")
-@Experimental
 final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
   extends Serializable {
 
@@ -952,7 +948,6 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
 }
 
 @Since("1.2.0")
-@Experimental
 object ParamMap {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 5dc2433e55c3..a2c4c2691190 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -26,12 +26,12 @@ import scala.util.{Sorting, Try}
 import scala.util.hashing.byteswap64
 
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
-import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 import org.json4s.JsonDSL._
 
 import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext}
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.param._
@@ -222,14 +222,12 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w
 }
 
 /**
- * :: Experimental ::
  * Model fitted by ALS.
  *
  * @param rank rank of the matrix factorization model
  * @param userFactors a DataFrame that stores user factors in two columns: `id` and `features`
  * @param itemFactors a DataFrame that stores item factors in two columns: `id` and `features`
  */
-@Experimental
 @Since("1.3.0")
 class ALSModel private[ml] (
     @Since("1.4.0") override val uid: String,
@@ -333,7 +331,6 @@ object ALSModel extends MLReadable[ALSModel] {
 }
 
 /**
- * :: Experimental ::
  * Alternating Least Squares (ALS) matrix factorization.
  *
  * ALS attempts to estimate the ratings matrix `R` as the product of two lower-rank matrices,
@@ -362,7 +359,6 @@ object ALSModel extends MLReadable[ALSModel] {
  * indicated user
  * preferences rather than explicit ratings given to items.
  */
-@Experimental
 @Since("1.3.0")
 class ALS(@Since("1.4.0") override val uid: String) extends Estimator[ALSModel] with ALSParams
   with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 7ff6d0afd55c..ebc6c12ddcf9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -21,7 +21,7 @@ import org.apache.hadoop.fs.Path
 import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.Vector
@@ -38,13 +38,11 @@ import org.apache.spark.sql.functions._
 
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm
  * for regression.
  * It supports both continuous and categorical features.
  */
 @Since("1.4.0")
-@Experimental
 class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
   with DecisionTreeRegressorParams with DefaultParamsWritable {
@@ -125,7 +123,6 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
 }
 
 @Since("1.4.0")
-@Experimental
 object DecisionTreeRegressor extends DefaultParamsReadable[DecisionTreeRegressor] {
   /** Accessor for supported impurities: variance */
   final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities
@@ -135,13 +132,11 @@ object DecisionTreeRegressor extends DefaultParamsReadable[DecisionTreeRegressor
 }
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for regression.
  * It supports both continuous and categorical features.
  * @param rootNode  Root of the decision tree
  */
 @Since("1.4.0")
-@Experimental
 class DecisionTreeRegressionModel private[ml] (
     override val uid: String,
     override val rootNode: Node,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 6223555504d7..ce355938ec1c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -38,7 +38,6 @@ import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
  * learning algorithm for regression.
  * It supports both continuous and categorical features.
@@ -56,7 +55,6 @@ import org.apache.spark.sql.functions._
  *    [https://issues.apache.org/jira/browse/SPARK-4240]
  */
 @Since("1.4.0")
-@Experimental
 class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
   with GBTRegressorParams with DefaultParamsWritable with Logging {
@@ -135,7 +133,6 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
 }
 
 @Since("1.4.0")
-@Experimental
 object GBTRegressor extends DefaultParamsReadable[GBTRegressor] {
 
   /** Accessor for supported loss settings: squared (L2), absolute (L1) */
@@ -147,8 +144,6 @@ object GBTRegressor extends DefaultParamsReadable[GBTRegressor] {
 }
 
 /**
- * :: Experimental ::
- *
  * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
  * model for regression.
  * It supports both continuous and categorical features.
@@ -156,7 +151,6 @@ object GBTRegressor extends DefaultParamsReadable[GBTRegressor] {
  * @param _treeWeights  Weights for the decision trees in the ensemble.
  */
 @Since("1.4.0")
-@Experimental
 class GBTRegressionModel private[ml](
     override val uid: String,
     private val _trees: Array[DecisionTreeRegressionModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 9b9429a328d0..35396446edc1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
@@ -120,7 +120,6 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
 }
 
 /**
- * :: Experimental ::
  * Isotonic regression.
  *
  * Currently implemented using parallelized pool adjacent violators algorithm.
@@ -129,7 +128,6 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
  * Uses [[org.apache.spark.mllib.regression.IsotonicRegression]].
  */
 @Since("1.5.0")
-@Experimental
 class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends Estimator[IsotonicRegressionModel]
   with IsotonicRegressionBase with DefaultParamsWritable {
@@ -192,7 +190,6 @@ object IsotonicRegression extends DefaultParamsReadable[IsotonicRegression] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by IsotonicRegression.
  * Predicts using a piecewise linear function.
  *
@@ -202,7 +199,6 @@ object IsotonicRegression extends DefaultParamsReadable[IsotonicRegression] {
  *                 model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]].
  */
 @Since("1.5.0")
-@Experimental
 class IsotonicRegressionModel private[ml] (
     override val uid: String,
     private val oldModel: MLlibIsotonicRegressionModel)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 0a4d98cab64a..e58c31138c0e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -54,7 +54,6 @@ private[regression] trait LinearRegressionParams extends PredictorParams
     with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver
 
 /**
- * :: Experimental ::
  * Linear regression.
  *
  * The learning objective is to minimize the squared error, with regularization.
@@ -68,7 +67,6 @@ private[regression] trait LinearRegressionParams extends PredictorParams
  *  - L2 + L1 (elastic net)
  */
 @Since("1.3.0")
-@Experimental
 class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String)
   extends Regressor[Vector, LinearRegression, LinearRegressionModel]
   with LinearRegressionParams with DefaultParamsWritable with Logging {
@@ -382,11 +380,9 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] {
 }
 
 /**
- * :: Experimental ::
  * Model produced by [[LinearRegression]].
  */
 @Since("1.3.0")
-@Experimental
 class LinearRegressionModel private[ml] (
     @Since("1.4.0") override val uid: String,
     @Since("2.0.0") val coefficients: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 4f4d3d27841d..0ad00aa6f928 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.regression
 import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.Vector
@@ -37,12 +37,10 @@ import org.apache.spark.sql.functions._
 
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] learning algorithm for regression.
  * It supports both continuous and categorical features.
  */
 @Since("1.4.0")
-@Experimental
 class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
   with RandomForestRegressorParams with DefaultParamsWritable {
@@ -118,7 +116,6 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
 }
 
 @Since("1.4.0")
-@Experimental
 object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor]{
   /** Accessor for supported impurity settings: variance */
   @Since("1.4.0")
@@ -135,7 +132,6 @@ object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor
 }
 
 /**
- * :: Experimental ::
  * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] model for regression.
  * It supports both continuous and categorical features.
  *
@@ -143,7 +139,6 @@ object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor
  * @param numFeatures  Number of features used by this model
  */
 @Since("1.4.0")
-@Experimental
 class RandomForestRegressionModel private[ml] (
     override val uid: String,
     private val _trees: Array[DecisionTreeRegressionModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
index d5e5c454605b..8144bcb7d46f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
@@ -17,17 +17,14 @@
 
 package org.apache.spark.ml.tree
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
 import org.apache.spark.mllib.tree.model.{ImpurityStats,
   InformationGainStats => OldInformationGainStats, Node => OldNode, Predict => OldPredict}
 
 /**
- * :: DeveloperApi ::
  * Decision tree node interface.
  */
-@DeveloperApi
 sealed abstract class Node extends Serializable {
 
   // TODO: Add aggregate stats (once available).  This will happen after we move the DecisionTree
@@ -109,12 +106,10 @@ private[ml] object Node {
 }
 
 /**
- * :: DeveloperApi ::
  * Decision tree leaf node.
  * @param prediction  Prediction this node makes
  * @param impurity  Impurity measure at this node (for training data)
  */
-@DeveloperApi
 class LeafNode private[ml] (
     override val prediction: Double,
     override val impurity: Double,
@@ -147,7 +142,6 @@ class LeafNode private[ml] (
 }
 
 /**
- * :: DeveloperApi ::
  * Internal Decision Tree node.
  * @param prediction  Prediction this node would make if it were a leaf node
  * @param impurity  Impurity measure at this node (for training data)
@@ -157,7 +151,6 @@ class LeafNode private[ml] (
  * @param rightChild  Right-hand child node
  * @param split  Information about the test used to split to the left or right child.
  */
-@DeveloperApi
 class InternalNode private[ml] (
     override val prediction: Double,
     override val impurity: Double,
@@ -167,6 +160,9 @@ class InternalNode private[ml] (
     val split: Split,
     override private[ml] val impurityStats: ImpurityCalculator) extends Node {
 
+  // Note to developers: The constructor argument impurityStats should be reconsidered before we
+  //                     make the constructor public.  We may be able to improve the representation.
+
   override def toString: String = {
     s"InternalNode(prediction = $prediction, impurity = $impurity, split = $split)"
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
index 9704e15cd838..47fe3524f229 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
@@ -19,18 +19,16 @@ package org.apache.spark.ml.tree
 
 import java.util.Objects
 
-import org.apache.spark.annotation.{DeveloperApi, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.mllib.tree.configuration.{FeatureType => OldFeatureType}
 import org.apache.spark.mllib.tree.model.{Split => OldSplit}
 
 
 /**
- * :: DeveloperApi ::
  * Interface for a "Split," which specifies a test made at a decision tree node
  * to choose the left or right path.
  */
-@DeveloperApi
 sealed trait Split extends Serializable {
 
   /** Index of feature which this split tests */
@@ -67,14 +65,12 @@ private[tree] object Split {
 }
 
 /**
- * :: DeveloperApi ::
  * Split which tests a categorical feature.
  * @param featureIndex  Index of the feature to test
  * @param _leftCategories  If the feature value is in this set of categories, then the split goes
  *                         left. Otherwise, it goes right.
  * @param numCategories  Number of categories for this feature.
  */
-@DeveloperApi
 class CategoricalSplit private[ml] (
     override val featureIndex: Int,
     _leftCategories: Array[Double],
@@ -153,13 +149,11 @@ class CategoricalSplit private[ml] (
 }
 
 /**
- * :: DeveloperApi ::
  * Split which tests a continuous feature.
  * @param featureIndex  Index of the feature to test
  * @param threshold  If the feature value is <= this threshold, then the split goes left.
  *                    Otherwise, it goes right.
  */
-@DeveloperApi
 class ContinuousSplit private[ml] (override val featureIndex: Int, val threshold: Double)
   extends Split {
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 7d42da4a2ffa..520557849b9e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -25,7 +25,7 @@ import com.github.fommil.netlib.F2jBLAS
 import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml._
 import org.apache.spark.ml.evaluation.Evaluator
@@ -55,11 +55,9 @@ private[ml] trait CrossValidatorParams extends ValidatorParams {
 }
 
 /**
- * :: Experimental ::
  * K-fold cross validation.
  */
 @Since("1.2.0")
-@Experimental
 class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
   extends Estimator[CrossValidatorModel]
   with CrossValidatorParams with MLWritable with Logging {
@@ -190,7 +188,6 @@ object CrossValidator extends MLReadable[CrossValidator] {
 }
 
 /**
- * :: Experimental ::
  * Model from k-fold cross validation.
  *
  * @param bestModel The best model selected from k-fold cross validation.
@@ -198,7 +195,6 @@ object CrossValidator extends MLReadable[CrossValidator] {
  *                   [[CrossValidator.estimatorParamMaps]], in the corresponding order.
  */
 @Since("1.2.0")
-@Experimental
 class CrossValidatorModel private[ml] (
     @Since("1.4.0") override val uid: String,
     @Since("1.2.0") val bestModel: Model[_],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
index 7d12f447f796..d369e7a61cdc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
@@ -20,15 +20,13 @@ package org.apache.spark.ml.tuning
 import scala.annotation.varargs
 import scala.collection.mutable
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.param._
 
 /**
- * :: Experimental ::
  * Builder for a param grid used in grid search-based model selection.
  */
 @Since("1.2.0")
-@Experimental
 class ParamGridBuilder @Since("1.2.0") {
 
   private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]]
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index f6f2bad401a1..0fdba1cb8814 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -25,7 +25,7 @@ import scala.language.existentials
 import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.evaluation.Evaluator
@@ -54,14 +54,12 @@ private[ml] trait TrainValidationSplitParams extends ValidatorParams {
 }
 
 /**
- * :: Experimental ::
  * Validation for hyper-parameter tuning.
  * Randomly splits the input dataset into train and validation sets,
  * and uses evaluation metric on the validation set to select the best model.
  * Similar to [[CrossValidator]], but only splits the set once.
  */
 @Since("1.5.0")
-@Experimental
 class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends Estimator[TrainValidationSplitModel]
   with TrainValidationSplitParams with MLWritable with Logging {
@@ -188,7 +186,6 @@ object TrainValidationSplit extends MLReadable[TrainValidationSplit] {
 }
 
 /**
- * :: Experimental ::
  * Model from train validation split.
  *
  * @param uid Id.
@@ -196,7 +193,6 @@ object TrainValidationSplit extends MLReadable[TrainValidationSplit] {
  * @param validationMetrics Evaluated validation metrics.
  */
 @Since("1.5.0")
-@Experimental
 class TrainValidationSplitModel private[ml] (
     @Since("1.5.0") override val uid: String,
     @Since("1.5.0") val bestModel: Model[_],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 91edcf2a7925..f1664ce4ab3f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -22,7 +22,7 @@ import java.util.Random
 import scala.annotation.tailrec
 import scala.collection.mutable
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
@@ -31,8 +31,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
 /**
- * :: Experimental ::
- *
  * A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques"
  * by Steinbach, Karypis, and Kumar, with modification to fit Spark.
  * The algorithm starts from a single cluster that contains all points.
@@ -54,7 +52,6 @@ import org.apache.spark.storage.StorageLevel
  *     KDD Workshop on Text Mining, 2000.]]
  */
 @Since("1.6.0")
-@Experimental
 class BisectingKMeans private (
     private var k: Int,
     private var maxIterations: Int,
@@ -398,8 +395,6 @@ private object BisectingKMeans extends Serializable {
 }
 
 /**
- * :: Experimental ::
- *
  * Represents a node in a clustering tree.
  *
  * @param index node index, negative for internal nodes and non-negative for leaf nodes
@@ -411,7 +406,6 @@ private object BisectingKMeans extends Serializable {
  * @param children children nodes
  */
 @Since("1.6.0")
-@Experimental
 private[clustering] class ClusteringTreeNode private[clustering] (
     val index: Int,
     val size: Long,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index 11fd940b8b20..8438015ccece 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -23,7 +23,7 @@ import org.json4s.jackson.JsonMethods._
 import org.json4s.JsonDSL._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.Vector
@@ -32,8 +32,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SparkSession}
 
 /**
- * :: Experimental ::
- *
  * Clustering model produced by [[BisectingKMeans]].
  * The prediction is done level-by-level from the root node to a leaf node, and at each node among
  * its children the closest to the input point is selected.
@@ -41,7 +39,6 @@ import org.apache.spark.sql.{Row, SparkSession}
  * @param root the root node of the clustering tree
  */
 @Since("1.6.0")
-@Experimental
 class BisectingKMeansModel private[clustering] (
     private[clustering] val root: ClusteringTreeNode
   ) extends Serializable with Saveable with Logging {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index d29582630041..9ebba1de0dad 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
 import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId}
 import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
@@ -426,13 +426,10 @@ class LocalLDAModel private[spark] (
 }
 
 /**
- * :: Experimental ::
- *
  * Local (non-distributed) model fitted by [[LDA]].
  *
  * This model stores the inferred topics only; it does not store info about the training dataset.
  */
-@Experimental
 @Since("1.5.0")
 object LocalLDAModel extends Loader[LocalLDAModel] {
 
@@ -822,15 +819,12 @@ class DistributedLDAModel private[clustering] (
 }
 
 /**
- * :: Experimental ::
- *
  * Distributed model fitted by [[LDA]].
  * This type of model is currently only produced by Expectation-Maximization (EM).
  *
  * This model stores the inferred topics, the full training dataset, and the topic distribution
  * for each training document.
  */
-@Experimental
 @Since("1.5.0")
 object DistributedLDAModel extends Loader[DistributedLDAModel] {
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 9a63cc29dacb..3c26d2670841 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.fpm
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.internal.Logging
@@ -28,14 +28,11 @@ import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset
 import org.apache.spark.rdd.RDD
 
 /**
- * :: Experimental ::
- *
  * Generates association rules from a [[RDD[FreqItemset[Item]]]. This method only generates
  * association rules which have a single item as the consequent.
  *
  */
 @Since("1.5.0")
-@Experimental
 class AssociationRules private[fpm] (
     private var minConfidence: Double) extends Logging with Serializable {
 
@@ -95,8 +92,6 @@ class AssociationRules private[fpm] (
 object AssociationRules {
 
   /**
-   * :: Experimental ::
-   *
    * An association rule between sets of items.
    * @param antecedent hypotheses of the rule. Java users should call [[Rule#javaAntecedent]]
    *                   instead.
@@ -106,7 +101,6 @@ object AssociationRules {
    *
    */
   @Since("1.5.0")
-  @Experimental
   class Rule[Item] private[fpm] (
       @Since("1.5.0") val antecedent: Array[Item],
       @Since("1.5.0") val consequent: Array[Item],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index c13c794775fe..7382000791cf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -30,7 +30,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods.{compact, render}
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.internal.Logging
@@ -42,8 +42,6 @@ import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
 
 /**
- * :: Experimental ::
- *
  * A parallel PrefixSpan algorithm to mine frequent sequential patterns.
  * The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns
  * Efficiently by Prefix-Projected Pattern Growth ([[http://doi.org/10.1109/ICDE.2001.914830]]).
@@ -60,7 +58,6 @@ import org.apache.spark.storage.StorageLevel
  * @see [[https://en.wikipedia.org/wiki/Sequential_Pattern_Mining Sequential Pattern Mining
  *       (Wikipedia)]]
  */
-@Experimental
 @Since("1.5.0")
 class PrefixSpan private (
     private var minSupport: Double,
@@ -230,7 +227,6 @@ class PrefixSpan private (
 
 }
 
-@Experimental
 @Since("1.5.0")
 object PrefixSpan extends Logging {
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
index 4591cb88ef15..8024b1c0031f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.linalg
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 
 /**
  * Represents singular value decomposition (SVD) factors.
@@ -26,10 +26,8 @@ import org.apache.spark.annotation.{Experimental, Since}
 case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType)
 
 /**
- * :: Experimental ::
  * Represents QR factors.
  */
 @Since("1.5.0")
-@Experimental
 case class QRDecomposition[QType, RType](Q: QType, R: RType)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 480a64548cb7..f37235500565 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import breeze.linalg.{norm, DenseVector => BDV}
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.rdd.RDD
@@ -53,11 +53,9 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va
   }
 
   /**
-   * :: Experimental ::
    * Set fraction of data to be used for each SGD iteration.
    * Default 1.0 (corresponding to deterministic/classical gradient descent)
    */
-  @Experimental
   def setMiniBatchFraction(fraction: Double): this.type = {
     require(fraction > 0 && fraction <= 1.0,
       s"Fraction for mini-batch SGD must be in range (0, 1] but got ${fraction}")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
index 274ac7c99553..5d61796f1de6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
@@ -23,7 +23,7 @@ import javax.xml.transform.stream.StreamResult
 import org.jpmml.model.JAXBUtil
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
 
 /**
@@ -45,20 +45,16 @@ trait PMMLExportable {
   }
 
   /**
-   * :: Experimental ::
    * Export the model to a local file in PMML format
    */
-  @Experimental
   @Since("1.4.0")
   def toPMML(localPath: String): Unit = {
     toPMML(new StreamResult(new File(localPath)))
   }
 
   /**
-   * :: Experimental ::
    * Export the model to a directory on a distributed file system in PMML format
    */
-  @Experimental
   @Since("1.4.0")
   def toPMML(sc: SparkContext, path: String): Unit = {
     val pmml = toPMML()
@@ -66,20 +62,16 @@ trait PMMLExportable {
   }
 
   /**
-   * :: Experimental ::
    * Export the model to the OutputStream in PMML format
    */
-  @Experimental
   @Since("1.4.0")
   def toPMML(outputStream: OutputStream): Unit = {
     toPMML(new StreamResult(outputStream))
   }
 
   /**
-   * :: Experimental ::
    * Export the model to a String in PMML format
    */
-  @Experimental
   @Since("1.4.0")
   def toPMML(): String = {
     val writer = new StringWriter
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
index 4c382d7c2b79..97c032de7a81 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat.test
 
 import scala.beans.BeanInfo
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.streaming.api.java.JavaDStream
 import org.apache.spark.streaming.dstream.DStream
@@ -42,7 +42,6 @@ case class BinarySample @Since("1.6.0") (
 }
 
 /**
- * :: Experimental ::
  * Performs online 2-sample significance testing for a stream of (Boolean, Double) pairs. The
  * Boolean identifies which sample each observation comes from, and the Double is the numeric value
  * of the observation.
@@ -67,7 +66,6 @@ case class BinarySample @Since("1.6.0") (
  *     .registerStream(DStream)
  * }}}
  */
-@Experimental
 @Since("1.6.0")
 class StreamingTest @Since("1.6.0") () extends Logging with Serializable {
   private var peacePeriod: Int = 0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
index 8a29fd39a910..5cfc05a3dd2d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.stat.test
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 
 /**
  * Trait for hypothesis test results.
@@ -94,10 +94,8 @@ class ChiSqTestResult private[stat] (override val pValue: Double,
 }
 
 /**
- * :: Experimental ::
  * Object containing the test results for the Kolmogorov-Smirnov test.
  */
-@Experimental
 @Since("1.5.0")
 class KolmogorovSmirnovTestResult private[stat] (
     @Since("1.5.0") override val pValue: Double,
@@ -113,10 +111,8 @@ class KolmogorovSmirnovTestResult private[stat] (
 }
 
 /**
- * :: Experimental ::
  * Object containing the test results for streaming testing.
  */
-@Experimental
 @Since("1.6.0")
 private[stat] class StreamingTestResult @Since("1.6.0") (
     @Since("1.6.0") override val pValue: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index 853c7319ec44..2436ce40866e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -17,14 +17,12 @@
 
 package org.apache.spark.mllib.tree.configuration
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 
 /**
- * :: Experimental ::
  * Enum to select the algorithm for the decision tree
  */
 @Since("1.0.0")
-@Experimental
 object Algo extends Enumeration {
   @Since("1.0.0")
   type Algo = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index 3a731f45d6a0..d4448da9eef5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -17,14 +17,12 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 
 /**
- * :: Experimental ::
  * Class for calculating entropy during multiclass classification.
  */
 @Since("1.0.0")
-@Experimental
 object Entropy extends Impurity {
 
   private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index 7730c0a8c111..22e70278a665 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -17,16 +17,14 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 
 /**
- * :: Experimental ::
  * Class for calculating the
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
  * during multiclass classification.
  */
 @Since("1.0.0")
-@Experimental
 object Gini extends Impurity {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 65f0163ec605..a5bdc2c6d2c9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -17,17 +17,15 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 
 /**
- * :: Experimental ::
  * Trait for calculating information gain.
  * This trait is used for
  *  (a) setting the impurity parameter in [[org.apache.spark.mllib.tree.configuration.Strategy]]
  *  (b) calculating impurity values from sufficient statistics.
  */
 @Since("1.0.0")
-@Experimental
 trait Impurity extends Serializable {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index 2423516123b8..c9bf0db4de3c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -17,14 +17,12 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 
 /**
- * :: Experimental ::
  * Class for calculating variance during regression
  */
 @Since("1.0.0")
-@Experimental
 object Variance extends Impurity {
 
   /**
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index c035942f7386..3c4af90acac8 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -49,8 +49,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
                          HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds,
                          HasWeightCol, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Logistic regression.
     Currently, this class only supports binary classification.
 
@@ -216,8 +214,6 @@ def _checkThresholdConsistency(self):
 
 class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by LogisticRegression.
 
     .. versionadded:: 1.3.0
@@ -277,6 +273,8 @@ def evaluate(self, dataset):
 
 class LogisticRegressionSummary(JavaWrapper):
     """
+    .. note:: Experimental
+
     Abstraction for Logistic Regression Results for a given model.
 
     .. versionadded:: 2.0.0
@@ -321,6 +319,8 @@ def featuresCol(self):
 @inherit_doc
 class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
     """
+    .. note:: Experimental
+
     Abstraction for multinomial Logistic Regression Training results.
     Currently, the training summary ignores the training weights except
     for the objective trace.
@@ -501,8 +501,6 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
                              TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
                              JavaMLReadable):
     """
-    .. note:: Experimental
-
     `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
     learning algorithm for classification.
     It supports both binary and multiclass labels, as well as both continuous and categorical
@@ -599,8 +597,6 @@ def _create_model(self, java_model):
 @inherit_doc
 class DecisionTreeClassificationModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by DecisionTreeClassifier.
 
     .. versionadded:: 1.4.0
@@ -634,8 +630,6 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
                              RandomForestParams, TreeClassifierParams, HasCheckpointInterval,
                              JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
     learning algorithm for classification.
     It supports both binary and multiclass labels, as well as both continuous and categorical
@@ -730,8 +724,6 @@ def _create_model(self, java_model):
 
 class RandomForestClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by RandomForestClassifier.
 
     .. versionadded:: 1.4.0
@@ -764,8 +756,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
                     GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
                     JavaMLReadable):
     """
-    .. note:: Experimental
-
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
     learning algorithm for classification.
     It supports binary labels, as well as both continuous and categorical features.
@@ -885,8 +875,6 @@ def getLossType(self):
 
 class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by GBTClassifier.
 
     .. versionadded:: 1.4.0
@@ -918,8 +906,6 @@ def trees(self):
 class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol,
                  HasRawPredictionCol, HasThresholds, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Naive Bayes Classifiers.
     It supports both Multinomial and Bernoulli NB. `Multinomial NB
     <http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html>`_
@@ -1043,8 +1029,6 @@ def getModelType(self):
 
 class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by NaiveBayes.
 
     .. versionadded:: 1.5.0
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index bbbb94f9a0a0..2881380152c8 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -60,8 +60,6 @@
 @inherit_doc
 class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Binarize a column of continuous features given a threshold.
 
     >>> df = spark.createDataFrame([(0.5,)], ["values"])
@@ -125,8 +123,6 @@ def getThreshold(self):
 @inherit_doc
 class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Maps a column of continuous features to a column of feature buckets.
 
     >>> df = spark.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
@@ -200,8 +196,6 @@ def getSplits(self):
 @inherit_doc
 class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Extracts a vocabulary from document collections and generates a :py:attr:`CountVectorizerModel`.
 
     >>> df = spark.createDataFrame(
@@ -348,8 +342,6 @@ def _create_model(self, java_model):
 
 class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`CountVectorizer`.
 
     .. versionadded:: 1.6.0
@@ -367,8 +359,6 @@ def vocabulary(self):
 @inherit_doc
 class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A feature transformer that takes the 1D discrete cosine transform
     of a real vector. No zero padding is performed on the input vector.
     It returns a real vector of the same length representing the DCT.
@@ -439,8 +429,6 @@ def getInverse(self):
 class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
                          JavaMLWritable):
     """
-    .. note:: Experimental
-
     Outputs the Hadamard product (i.e., the element-wise product) of each input vector
     with a provided "weight" vector. In other words, it scales each column of the dataset
     by a scalar multiplier.
@@ -505,8 +493,6 @@ def getScalingVec(self):
 class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, JavaMLReadable,
                 JavaMLWritable):
     """
-    .. note:: Experimental
-
     Maps a sequence of terms to their term frequencies using the hashing trick.
     Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32)
     to calculate the hash code value for the term object.
@@ -576,8 +562,6 @@ def getBinary(self):
 @inherit_doc
 class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Compute the Inverse Document Frequency (IDF) given a collection of documents.
 
     >>> from pyspark.ml.linalg import DenseVector
@@ -653,8 +637,6 @@ def _create_model(self, java_model):
 
 class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`IDF`.
 
     .. versionadded:: 1.4.0
@@ -752,8 +734,6 @@ def maxAbs(self):
 @inherit_doc
 class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Rescale each feature individually to a common range [min, max] linearly using column summary
     statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
     feature E is calculated as,
@@ -859,8 +839,6 @@ def _create_model(self, java_model):
 
 class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`MinMaxScaler`.
 
     .. versionadded:: 1.6.0
@@ -887,8 +865,6 @@ def originalMax(self):
 @ignore_unicode_prefix
 class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A feature transformer that converts the input array of strings into an array of n-grams. Null
     values in the input array are ignored.
     It returns an array of n-grams where each n-gram is represented by a space-separated string of
@@ -965,8 +941,6 @@ def getN(self):
 @inherit_doc
 class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
      Normalize a vector to have unit norm using the given p-norm.
 
     >>> from pyspark.ml.linalg import Vectors
@@ -1031,8 +1005,6 @@ def getP(self):
 @inherit_doc
 class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A one-hot encoder that maps a column of category indices to a
     column of binary vectors, with at most a single one-value per row
     that indicates the input category index.
@@ -1114,8 +1086,6 @@ def getDropLast(self):
 class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
                           JavaMLWritable):
     """
-    .. note:: Experimental
-
     Perform feature expansion in a polynomial space. As said in `wikipedia of Polynomial Expansion
     <http://en.wikipedia.org/wiki/Polynomial_expansion>`_, "In mathematics, an
     expansion of a product of sums expresses it as a sum of products by using the fact that
@@ -1287,8 +1257,6 @@ def _create_model(self, java_model):
 @ignore_unicode_prefix
 class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A regex based tokenizer that extracts tokens either by using the
     provided regex pattern (in Java dialect) to split the text
     (default) or repeatedly matching the regex (if gaps is false).
@@ -1418,8 +1386,6 @@ def getToLowercase(self):
 @inherit_doc
 class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Implements the transforms which are defined by SQL statement.
     Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
     where '__THIS__' represents the underlying table of the input dataset.
@@ -1479,8 +1445,6 @@ def getStatement(self):
 @inherit_doc
 class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Standardizes features by removing the mean and scaling to unit variance using column summary
     statistics on the samples in the training set.
 
@@ -1576,8 +1540,6 @@ def _create_model(self, java_model):
 
 class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`StandardScaler`.
 
     .. versionadded:: 1.4.0
@@ -1604,8 +1566,6 @@ def mean(self):
 class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, JavaMLReadable,
                     JavaMLWritable):
     """
-    .. note:: Experimental
-
     A label indexer that maps a string column of labels to an ML column of label indices.
     If the input column is numeric, we cast it to string and index the string values.
     The indices are in [0, numLabels), ordered by label frequencies.
@@ -1668,8 +1628,6 @@ def _create_model(self, java_model):
 
 class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`StringIndexer`.
 
     .. versionadded:: 1.4.0
@@ -1687,8 +1645,6 @@ def labels(self):
 @inherit_doc
 class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A :py:class:`Transformer` that maps a column of indices back to a new column of
     corresponding string values.
     The index-string mapping is either from the ML attributes of the input column,
@@ -1741,8 +1697,6 @@ def getLabels(self):
 
 class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A feature transformer that filters out stop words from input.
     Note: null values from input array are preserved unless adding null to stopWords explicitly.
 
@@ -1833,8 +1787,6 @@ def loadDefaultStopWords(language):
 @ignore_unicode_prefix
 class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A tokenizer that converts the input string to lowercase and then
     splits it by white spaces.
 
@@ -1888,8 +1840,6 @@ def setParams(self, inputCol=None, outputCol=None):
 @inherit_doc
 class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A feature transformer that merges multiple columns into a vector column.
 
     >>> df = spark.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
@@ -1934,8 +1884,6 @@ def setParams(self, inputCols=None, outputCol=None):
 @inherit_doc
 class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Class for indexing categorical feature columns in a dataset of `Vector`.
 
     This has 2 usage modes:
@@ -2050,8 +1998,6 @@ def _create_model(self, java_model):
 
 class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`VectorIndexer`.
 
     Transform categorical features to use 0-based indices instead of their original values.
@@ -2089,8 +2035,6 @@ def categoryMaps(self):
 @inherit_doc
 class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     This class takes a feature vector and outputs a new feature vector with a subarray
     of the original features.
 
@@ -2183,8 +2127,6 @@ def getNames(self):
 class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol,
                JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
     natural language processing or machine learning process.
 
@@ -2352,8 +2294,6 @@ def _create_model(self, java_model):
 
 class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`Word2Vec`.
 
     .. versionadded:: 1.4.0
@@ -2383,8 +2323,6 @@ def findSynonyms(self, word, num):
 @inherit_doc
 class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     PCA trains a model to project vectors to a lower dimensional space of the
     top :py:attr:`k` principal components.
 
@@ -2458,8 +2396,6 @@ def _create_model(self, java_model):
 
 class PCAModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`PCA`. Transforms vectors to a lower dimensional space.
 
     .. versionadded:: 1.5.0
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 8de9ad85311f..d88dc7535359 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -41,8 +41,6 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
                        HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
                        HasStandardization, HasSolver, HasWeightCol, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Linear regression.
 
     The learning objective is to minimize the squared error, with regularization.
@@ -130,8 +128,6 @@ def _create_model(self, java_model):
 
 class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by :class:`LinearRegression`.
 
     .. versionadded:: 1.4.0
@@ -411,8 +407,6 @@ def totalIterations(self):
 class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
                          HasWeightCol, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Currently implemented using parallelized pool adjacent violators algorithm.
     Only univariate (single feature) algorithm supported.
 
@@ -439,6 +433,8 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     True
     >>> model.predictions == model2.predictions
     True
+
+    .. versionadded:: 1.6.0
     """
 
     isotonic = \
@@ -505,13 +501,13 @@ def getFeatureIndex(self):
 
 class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by :class:`IsotonicRegression`.
+
+    .. versionadded:: 1.6.0
     """
 
     @property
-    @since("2.0.0")
+    @since("1.6.0")
     def boundaries(self):
         """
         Boundaries in increasing order for which predictions are known.
@@ -519,7 +515,7 @@ def boundaries(self):
         return self._call_java("boundaries")
 
     @property
-    @since("2.0.0")
+    @since("1.6.0")
     def predictions(self):
         """
         Predictions associated with the boundaries at the same index, monotone because of isotonic
@@ -642,8 +638,6 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
                             DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval,
                             HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol):
     """
-    .. note:: Experimental
-
     `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
     learning algorithm for regression.
     It supports both continuous and categorical features.
@@ -727,8 +721,6 @@ def _create_model(self, java_model):
 @inherit_doc
 class DecisionTreeModel(JavaModel):
     """
-    .. note:: Experimental
-
     Abstraction for Decision Tree models.
 
     .. versionadded:: 1.5.0
@@ -759,11 +751,9 @@ def __repr__(self):
 @inherit_doc
 class TreeEnsembleModels(JavaModel):
     """
-    .. note:: Experimental
+    (private abstraction)
 
     Represents a tree ensemble model.
-
-    .. versionadded:: 1.5.0
     """
 
     @property
@@ -803,8 +793,6 @@ def __repr__(self):
 @inherit_doc
 class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by :class:`DecisionTreeRegressor`.
 
     .. versionadded:: 1.4.0
@@ -837,8 +825,6 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
                             RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
                             JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
     learning algorithm for regression.
     It supports both continuous and categorical features.
@@ -925,8 +911,6 @@ def _create_model(self, java_model):
 
 class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by :class:`RandomForestRegressor`.
 
     .. versionadded:: 1.4.0
@@ -959,8 +943,6 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
                    GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
                    JavaMLReadable, TreeRegressorParams):
     """
-    .. note:: Experimental
-
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
     learning algorithm for regression.
     It supports both continuous and categorical features.
@@ -1067,8 +1049,6 @@ def getLossType(self):
 
 class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by :class:`GBTRegressor`.
 
     .. versionadded:: 1.4.0
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index f857c5e8c86b..298314d46caf 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -33,8 +33,6 @@
 
 class ParamGridBuilder(object):
     r"""
-    .. note:: Experimental
-
     Builder for a param grid used in grid search-based model selection.
 
     >>> from pyspark.ml.classification import LogisticRegression
@@ -145,8 +143,6 @@ def getEvaluator(self):
 
 class CrossValidator(Estimator, ValidatorParams):
     """
-    .. note:: Experimental
-
     K-fold cross validation.
 
     >>> from pyspark.ml.classification import LogisticRegression
@@ -264,8 +260,6 @@ def copy(self, extra=None):
 
 class CrossValidatorModel(Model, ValidatorParams):
     """
-    .. note:: Experimental
-
     Model from k-fold cross validation.
 
     .. versionadded:: 1.4.0
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 3734f87405e5..9f53ed098202 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -48,8 +48,6 @@ def __init__(self, weights, intercept):
     @since('1.4.0')
     def setThreshold(self, value):
         """
-        .. note:: Experimental
-
         Sets the threshold that separates positive predictions from
         negative predictions. An example with prediction score greater
         than or equal to this threshold is identified as a positive,
@@ -62,8 +60,6 @@ def setThreshold(self, value):
     @since('1.4.0')
     def threshold(self):
         """
-        .. note:: Experimental
-
         Returns the threshold (if any) used for converting raw
         prediction scores into 0/1 predictions. It is used for
         binary classification only.
@@ -73,8 +69,6 @@ def threshold(self):
     @since('1.4.0')
     def clearThreshold(self):
         """
-        .. note:: Experimental
-
         Clears the threshold so that `predict` will output raw
         prediction scores. It is used for binary classification only.
         """
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index c38c543972d1..c8c3c42774f2 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -47,8 +47,6 @@
 @inherit_doc
 class BisectingKMeansModel(JavaModelWrapper):
     """
-    .. note:: Experimental
-
     A clustering model derived from the bisecting k-means method.
 
     >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
@@ -120,8 +118,6 @@ def computeCost(self, x):
 
 class BisectingKMeans(object):
     """
-    .. note:: Experimental
-
     A bisecting k-means algorithm based on the paper "A comparison of
     document clustering techniques" by Steinbach, Karypis, and Kumar,
     with modification to fit Spark.
@@ -366,8 +362,6 @@ def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"
 class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
 
     """
-    .. note:: Experimental
-
     A clustering model derived from the Gaussian Mixture Model method.
 
     >>> from pyspark.mllib.linalg import Vectors, DenseMatrix
@@ -513,8 +507,6 @@ def load(cls, sc, path):
 
 class GaussianMixture(object):
     """
-    .. note:: Experimental
-
     Learning algorithm for Gaussian Mixtures using the expectation-maximization algorithm.
 
     .. versionadded:: 1.3.0
@@ -565,8 +557,6 @@ def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initia
 class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader):
 
     """
-    .. note:: Experimental
-
     Model produced by [[PowerIterationClustering]].
 
     >>> import math
@@ -645,8 +635,6 @@ def load(cls, sc, path):
 
 class PowerIterationClustering(object):
     """
-    .. note:: Experimental
-
     Power Iteration Clustering (PIC), a scalable graph clustering algorithm
     developed by [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]].
     From the abstract: PIC finds a very low-dimensional embedding of a
@@ -693,8 +681,6 @@ class Assignment(namedtuple("Assignment", ["id", "cluster"])):
 
 class StreamingKMeansModel(KMeansModel):
     """
-    .. note:: Experimental
-
     Clustering model which can perform an online update of the centroids.
 
     The update formula for each centroid is given by
@@ -794,8 +780,6 @@ def update(self, data, decayFactor, timeUnit):
 
 class StreamingKMeans(object):
     """
-    .. note:: Experimental
-
     Provides methods to set k, decayFactor, timeUnit to configure the
     KMeans algorithm for fitting and predicting on incoming dstreams.
     More details on how the centroids are updated are provided under the
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index aef91a8ddc1f..c8a6e33f4d9a 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -60,8 +60,6 @@ def transform(self, vector):
 
 class Normalizer(VectorTransformer):
     """
-    .. note:: Experimental
-
     Normalizes samples individually to unit L\ :sup:`p`\  norm
 
     For any 1 <= `p` < float('inf'), normalizes samples using
@@ -131,8 +129,6 @@ def transform(self, vector):
 
 class StandardScalerModel(JavaVectorTransformer):
     """
-    .. note:: Experimental
-
     Represents a StandardScaler model that can transform vectors.
 
     .. versionadded:: 1.2.0
@@ -207,8 +203,6 @@ def mean(self):
 
 class StandardScaler(object):
     """
-    .. note:: Experimental
-
     Standardizes features by removing the mean and scaling to unit
     variance using column summary statistics on the samples in the
     training set.
@@ -262,8 +256,6 @@ def fit(self, dataset):
 
 class ChiSqSelectorModel(JavaVectorTransformer):
     """
-    .. note:: Experimental
-
     Represents a Chi Squared selector model.
 
     .. versionadded:: 1.4.0
@@ -282,8 +274,6 @@ def transform(self, vector):
 
 class ChiSqSelector(object):
     """
-    .. note:: Experimental
-
     Creates a ChiSquared feature selector.
 
     :param numTopFeatures: number of features that selector will select.
@@ -361,8 +351,6 @@ def fit(self, data):
 
 class HashingTF(object):
     """
-    .. note:: Experimental
-
     Maps a sequence of terms to their term frequencies using the hashing
     trick.
 
@@ -448,8 +436,6 @@ def idf(self):
 
 class IDF(object):
     """
-    .. note:: Experimental
-
     Inverse document frequency (IDF).
 
     The standard formulation is used: `idf = log((m + 1) / (d(t) + 1))`,
@@ -697,8 +683,6 @@ def fit(self, data):
 
 class ElementwiseProduct(VectorTransformer):
     """
-    .. note:: Experimental
-
     Scales each column of the vector, with the supplied weight vector.
     i.e the elementwise product.
 
diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index fb226e84e5d5..f58ea5dfb087 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -31,8 +31,6 @@
 @ignore_unicode_prefix
 class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     """
-    .. note:: Experimental
-
     A FP-Growth model for mining frequent itemsets
     using the Parallel FP-Growth algorithm.
 
@@ -70,8 +68,6 @@ def load(cls, sc, path):
 
 class FPGrowth(object):
     """
-    .. note:: Experimental
-
     A Parallel FP-growth algorithm to mine frequent itemsets.
 
     .. versionadded:: 1.4.0
@@ -108,8 +104,6 @@ class FreqItemset(namedtuple("FreqItemset", ["items", "freq"])):
 @ignore_unicode_prefix
 class PrefixSpanModel(JavaModelWrapper):
     """
-    .. note:: Experimental
-
     Model fitted by PrefixSpan
 
     >>> data = [
@@ -133,8 +127,6 @@ def freqSequences(self):
 
 class PrefixSpan(object):
     """
-    .. note:: Experimental
-
     A parallel PrefixSpan algorithm to mine frequent sequential patterns.
     The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan:
     Mining Sequential Patterns Efficiently by Prefix-Projected Pattern Growth
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 15dc53a959d6..9672dbde823f 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -1338,8 +1338,6 @@ def fromML(mat):
 
 class QRDecomposition(object):
     """
-    .. note:: Experimental
-
     Represents QR factors.
     """
     def __init__(self, Q, R):
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index ea4f27cf4ffe..538cada7d163 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -40,8 +40,6 @@
 
 class DistributedMatrix(object):
     """
-    .. note:: Experimental
-
     Represents a distributively stored matrix backed by one or
     more RDDs.
 
@@ -57,8 +55,6 @@ def numCols(self):
 
 class RowMatrix(DistributedMatrix):
     """
-    .. note:: Experimental
-
     Represents a row-oriented distributed Matrix with no meaningful
     row indices.
 
@@ -306,8 +302,6 @@ def tallSkinnyQR(self, computeQ=False):
 
 class IndexedRow(object):
     """
-    .. note:: Experimental
-
     Represents a row of an IndexedRowMatrix.
 
     Just a wrapper over a (long, vector) tuple.
@@ -334,8 +328,6 @@ def _convert_to_indexed_row(row):
 
 class IndexedRowMatrix(DistributedMatrix):
     """
-    .. note:: Experimental
-
     Represents a row-oriented distributed Matrix with indexed rows.
 
     :param rows: An RDD of IndexedRows or (long, vector) tuples.
@@ -536,8 +528,6 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
 
 class MatrixEntry(object):
     """
-    .. note:: Experimental
-
     Represents an entry of a CoordinateMatrix.
 
     Just a wrapper over a (long, long, float) tuple.
@@ -566,8 +556,6 @@ def _convert_to_matrix_entry(entry):
 
 class CoordinateMatrix(DistributedMatrix):
     """
-    .. note:: Experimental
-
     Represents a matrix in coordinate format.
 
     :param entries: An RDD of MatrixEntry inputs or
@@ -795,8 +783,6 @@ def _convert_to_matrix_block_tuple(block):
 
 class BlockMatrix(DistributedMatrix):
     """
-    .. note:: Experimental
-
     Represents a distributed matrix in blocks of local matrices.
 
     :param blocks: An RDD of sub-matrix blocks
diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py
index 7da921976d4d..3b1c5519bd87 100644
--- a/python/pyspark/mllib/stat/KernelDensity.py
+++ b/python/pyspark/mllib/stat/KernelDensity.py
@@ -28,8 +28,6 @@
 
 class KernelDensity(object):
     """
-    .. note:: Experimental
-
     Estimate probability density at required points given a RDD of samples
     from the population.
 
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
index b0a85240b289..67d5f0e44f41 100644
--- a/python/pyspark/mllib/stat/_statistics.py
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -160,8 +160,6 @@ def corr(x, y=None, method=None):
     @ignore_unicode_prefix
     def chiSqTest(observed, expected=None):
         """
-        .. note:: Experimental
-
         If `observed` is Vector, conduct Pearson's chi-squared goodness
         of fit test of the observed data against the expected distribution,
         or againt the uniform distribution (by default), with each category
@@ -246,8 +244,6 @@ def chiSqTest(observed, expected=None):
     @ignore_unicode_prefix
     def kolmogorovSmirnovTest(data, distName="norm", *params):
         """
-        .. note:: Experimental
-
         Performs the Kolmogorov-Smirnov (KS) test for data sampled from
         a continuous distribution. It tests the null hypothesis that
         the data is generated from a particular distribution.
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index 8be76fcefe54..b3011d42e56a 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -76,8 +76,6 @@ def toDebugString(self):
 
 class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     """
-    .. note:: Experimental
-
     A decision tree model for classification or regression.
 
     .. versionadded:: 1.1.0
@@ -130,8 +128,6 @@ def _java_loader_class(cls):
 
 class DecisionTree(object):
     """
-    .. note:: Experimental
-
     Learning algorithm for a decision tree model for classification or
     regression.
 
@@ -283,8 +279,6 @@ def trainRegressor(cls, data, categoricalFeaturesInfo,
 @inherit_doc
 class RandomForestModel(TreeEnsembleModel, JavaLoader):
     """
-    .. note:: Experimental
-
     Represents a random forest model.
 
     .. versionadded:: 1.2.0
@@ -297,8 +291,6 @@ def _java_loader_class(cls):
 
 class RandomForest(object):
     """
-    .. note:: Experimental
-
     Learning algorithm for a random forest model for classification or
     regression.
 
@@ -486,8 +478,6 @@ def trainRegressor(cls, data, categoricalFeaturesInfo, numTrees, featureSubsetSt
 @inherit_doc
 class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader):
     """
-    .. note:: Experimental
-
     Represents a gradient-boosted tree model.
 
     .. versionadded:: 1.3.0
@@ -500,8 +490,6 @@ def _java_loader_class(cls):
 
 class GradientBoostedTrees(object):
     """
-    .. note:: Experimental
-
     Learning algorithm for a gradient boosted trees model for
     classification or regression.