From d74ced64b6255a953646f44b6767e167ab16b5e3 Mon Sep 17 00:00:00 2001 From: MechCoder Date: Wed, 19 Aug 2015 16:58:49 +0530 Subject: [PATCH 1/4] [SPARK-10108] Add since tags to mllib.feature --- .../spark/mllib/feature/ChiSqSelector.scala | 4 +++ .../mllib/feature/ElementwiseProduct.scala | 2 ++ .../spark/mllib/feature/HashingTF.scala | 9 +++++++ .../org/apache/spark/mllib/feature/IDF.scala | 6 +++++ .../spark/mllib/feature/Normalizer.scala | 5 ++++ .../org/apache/spark/mllib/feature/PCA.scala | 8 +++++- .../spark/mllib/feature/StandardScaler.scala | 19 ++++++++++++++ .../mllib/feature/VectorTransformer.scala | 4 +++ .../apache/spark/mllib/feature/Word2Vec.scala | 25 +++++++++++++++++++ 9 files changed, 81 insertions(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala index 5f8c1dea237b4..2195a46780ad6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala @@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD * Chi Squared selector model. * * @param selectedFeatures list of indices to select (filter). Must be ordered asc + * @since 1.3.0 */ @Experimental class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransformer { @@ -51,6 +52,7 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf * * @param vector vector to be transformed. * @return transformed vector. + * @since 1.3.0 */ override def transform(vector: Vector): Vector = { compress(vector, selectedFeatures) @@ -106,6 +108,7 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf * Creates a ChiSquared feature selector. * @param numTopFeatures number of features that selector will select * (ordered by statistic value descending) + * @since 1.3.0 */ @Experimental class ChiSqSelector (val numTopFeatures: Int) extends Serializable { @@ -116,6 +119,7 @@ class ChiSqSelector (val numTopFeatures: Int) extends Serializable { * @param data an `RDD[LabeledPoint]` containing the labeled dataset with categorical features. * Real-valued features will be treated as categorical for each distinct value. * Apply feature discretizer before using this function. + * @since 1.3.0 */ def fit(data: RDD[LabeledPoint]): ChiSqSelectorModel = { val indices = Statistics.chiSqTest(data) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala index d67fe6c3ee4f8..1b49092abcb74 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala @@ -26,6 +26,7 @@ import org.apache.spark.mllib.linalg._ * provided "weight" vector. In other words, it scales each column of the dataset by a scalar * multiplier. * @param scalingVec The values used to scale the reference vector's individual components. + * @since 1.4.0 */ @Experimental class ElementwiseProduct(val scalingVec: Vector) extends VectorTransformer { @@ -35,6 +36,7 @@ class ElementwiseProduct(val scalingVec: Vector) extends VectorTransformer { * * @param vector vector to be transformed. * @return transformed vector. + * @since 1.4.0 */ override def transform(vector: Vector): Vector = { require(vector.size == scalingVec.size, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala index c53475818395f..8bc439563bf43 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala @@ -33,19 +33,25 @@ import org.apache.spark.util.Utils * Maps a sequence of terms to their term frequencies using the hashing trick. * * @param numFeatures number of features (default: 2^20^) + * @since 1.1.0 */ @Experimental class HashingTF(val numFeatures: Int) extends Serializable { + /** + * @since 1.1.0 + */ def this() = this(1 << 20) /** * Returns the index of the input term. + * @since 1.1.0 */ def indexOf(term: Any): Int = Utils.nonNegativeMod(term.##, numFeatures) /** * Transforms the input document into a sparse term frequency vector. + * @since 1.1.0 */ def transform(document: Iterable[_]): Vector = { val termFrequencies = mutable.HashMap.empty[Int, Double] @@ -58,6 +64,7 @@ class HashingTF(val numFeatures: Int) extends Serializable { /** * Transforms the input document into a sparse term frequency vector (Java version). + * @since 1.1.0 */ def transform(document: JavaIterable[_]): Vector = { transform(document.asScala) @@ -65,6 +72,7 @@ class HashingTF(val numFeatures: Int) extends Serializable { /** * Transforms the input document to term frequency vectors. + * @since 1.1.0 */ def transform[D <: Iterable[_]](dataset: RDD[D]): RDD[Vector] = { dataset.map(this.transform) @@ -72,6 +80,7 @@ class HashingTF(val numFeatures: Int) extends Serializable { /** * Transforms the input document to term frequency vectors (Java version). + * @since 1.1.0 */ def transform[D <: JavaIterable[_]](dataset: JavaRDD[D]): JavaRDD[Vector] = { dataset.rdd.map(this.transform).toJavaRDD() diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala index 3fab7ea79befc..b6d72d6e50866 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala @@ -36,6 +36,7 @@ import org.apache.spark.rdd.RDD * * @param minDocFreq minimum of documents in which a term * should appear for filtering + * @since 1.1.0 */ @Experimental class IDF(val minDocFreq: Int) { @@ -47,6 +48,7 @@ class IDF(val minDocFreq: Int) { /** * Computes the inverse document frequency. * @param dataset an RDD of term frequency vectors + * @since 1.1.0 */ def fit(dataset: RDD[Vector]): IDFModel = { val idf = dataset.treeAggregate(new IDF.DocumentFrequencyAggregator( @@ -60,6 +62,7 @@ class IDF(val minDocFreq: Int) { /** * Computes the inverse document frequency. * @param dataset a JavaRDD of term frequency vectors + * @since 1.1.0 */ def fit(dataset: JavaRDD[Vector]): IDFModel = { fit(dataset.rdd) @@ -170,6 +173,7 @@ class IDFModel private[spark] (val idf: Vector) extends Serializable { * * @param dataset an RDD of term frequency vectors * @return an RDD of TF-IDF vectors + * @since 1.1.0 */ def transform(dataset: RDD[Vector]): RDD[Vector] = { val bcIdf = dataset.context.broadcast(idf) @@ -181,6 +185,7 @@ class IDFModel private[spark] (val idf: Vector) extends Serializable { * * @param v a term frequency vector * @return a TF-IDF vector + * @since 1.3.0 */ def transform(v: Vector): Vector = IDFModel.transform(idf, v) @@ -188,6 +193,7 @@ class IDFModel private[spark] (val idf: Vector) extends Serializable { * Transforms term frequency (TF) vectors to TF-IDF vectors (Java version). * @param dataset a JavaRDD of term frequency vectors * @return a JavaRDD of TF-IDF vectors + * @since 1.1.0 */ def transform(dataset: JavaRDD[Vector]): JavaRDD[Vector] = { transform(dataset.rdd).toJavaRDD() diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala index 32848e039eb81..1516af28a0b19 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala @@ -30,10 +30,14 @@ import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors * For p = Double.PositiveInfinity, max(abs(vector)) will be used as norm for normalization. * * @param p Normalization in L^p^ space, p = 2 by default. + * @since 1.1.0 */ @Experimental class Normalizer(p: Double) extends VectorTransformer { + /** + * @since 1.1.0 + */ def this() = this(2) require(p >= 1.0) @@ -43,6 +47,7 @@ class Normalizer(p: Double) extends VectorTransformer { * * @param vector vector to be normalized. * @return normalized vector. If the norm of the input is zero, it will return the input vector. + * @since 1.1.0 */ override def transform(vector: Vector): Vector = { val norm = Vectors.norm(vector, p) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index 2a66263d8b7d6..d33cc0b1c2e15 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD * A feature transformer that projects vectors to a low-dimensional space using PCA. * * @param k number of principal components + * @since 1.4.0 */ class PCA(val k: Int) { require(k >= 1, s"PCA requires a number of principal components k >= 1 but was given $k") @@ -34,6 +35,7 @@ class PCA(val k: Int) { * Computes a [[PCAModel]] that contains the principal components of the input vectors. * * @param sources source vectors + * @since 1.4.0 */ def fit(sources: RDD[Vector]): PCAModel = { require(k <= sources.first().size, @@ -58,7 +60,10 @@ class PCA(val k: Int) { new PCAModel(k, pc) } - /** Java-friendly version of [[fit()]] */ + /** + * Java-friendly version of [[fit()]] + * @since 1.4.0 + */ def fit(sources: JavaRDD[Vector]): PCAModel = fit(sources.rdd) } @@ -75,6 +80,7 @@ class PCAModel private[spark] (val k: Int, val pc: DenseMatrix) extends VectorTr * @param vector vector to be transformed. * Vector must be the same length as the source vectors given to [[PCA.fit()]]. * @return transformed vector. Vector will be of length k. + * @since 1.4.0 */ override def transform(vector: Vector): Vector = { vector match { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala index c73b8f258060d..9998602b0c2ac 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala @@ -31,10 +31,14 @@ import org.apache.spark.rdd.RDD * @param withMean False by default. Centers the data with mean before scaling. It will build a * dense output, so this does not work on sparse input and will raise an exception. * @param withStd True by default. Scales the data to unit standard deviation. + * @since 1.1.0 */ @Experimental class StandardScaler(withMean: Boolean, withStd: Boolean) extends Logging { + /** + * @since 1.1.0 + */ def this() = this(false, true) if (!(withMean || withStd)) { @@ -46,6 +50,7 @@ class StandardScaler(withMean: Boolean, withStd: Boolean) extends Logging { * * @param data The data used to compute the mean and variance to build the transformation model. * @return a StandardScalarModel + * @since 1.1.0 */ def fit(data: RDD[Vector]): StandardScalerModel = { // TODO: skip computation if both withMean and withStd are false @@ -68,6 +73,7 @@ class StandardScaler(withMean: Boolean, withStd: Boolean) extends Logging { * @param mean column mean values * @param withStd whether to scale the data to have unit standard deviation * @param withMean whether to center the data before scaling + * @since 1.1.0 */ @Experimental class StandardScalerModel ( @@ -76,6 +82,9 @@ class StandardScalerModel ( var withStd: Boolean, var withMean: Boolean) extends VectorTransformer { + /** + * @since 1.3.0 + */ def this(std: Vector, mean: Vector) { this(std, mean, withStd = std != null, withMean = mean != null) require(this.withStd || this.withMean, @@ -86,8 +95,14 @@ class StandardScalerModel ( } } + /** + * @since 1.3.0 + */ def this(std: Vector) = this(std, null) + /** + * @since 1.3.0 + */ @DeveloperApi def setWithMean(withMean: Boolean): this.type = { require(!(withMean && this.mean == null), "cannot set withMean to true while mean is null") @@ -95,6 +110,9 @@ class StandardScalerModel ( this } + /** + * @since 1.3.0 + */ @DeveloperApi def setWithStd(withStd: Boolean): this.type = { require(!(withStd && this.std == null), @@ -114,6 +132,7 @@ class StandardScalerModel ( * @param vector Vector to be standardized. * @return Standardized vector. If the std of a column is zero, it will return default `0.0` * for the column with zero std. + * @since 1.1.0 */ override def transform(vector: Vector): Vector = { require(mean.size == vector.size) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala index 7358c1c84f79c..9e0218d364972 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala @@ -25,6 +25,7 @@ import org.apache.spark.rdd.RDD /** * :: DeveloperApi :: * Trait for transformation of a vector + * @since 1.1.0 */ @DeveloperApi trait VectorTransformer extends Serializable { @@ -34,6 +35,7 @@ trait VectorTransformer extends Serializable { * * @param vector vector to be transformed. * @return transformed vector. + * @since 1.1.0 */ def transform(vector: Vector): Vector @@ -42,6 +44,7 @@ trait VectorTransformer extends Serializable { * * @param data RDD[Vector] to be transformed. * @return transformed RDD[Vector]. + * @since 1.1.0 */ def transform(data: RDD[Vector]): RDD[Vector] = { // Later in #1498 , all RDD objects are sent via broadcasting instead of akka. @@ -54,6 +57,7 @@ trait VectorTransformer extends Serializable { * * @param data JavaRDD[Vector] to be transformed. * @return transformed JavaRDD[Vector]. + * @since 1.1.0 */ def transform(data: JavaRDD[Vector]): JavaRDD[Vector] = { transform(data.rdd) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index cbbd2b0c8d060..77e2eceab794a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -69,6 +69,7 @@ private case class VocabWord( * Efficient Estimation of Word Representations in Vector Space * and * Distributed Representations of Words and Phrases and their Compositionality. + * @since 1.1.0 */ @Experimental class Word2Vec extends Serializable with Logging { @@ -82,6 +83,7 @@ class Word2Vec extends Serializable with Logging { /** * Sets vector size (default: 100). + * @since 1.1.0 */ def setVectorSize(vectorSize: Int): this.type = { this.vectorSize = vectorSize @@ -90,6 +92,7 @@ class Word2Vec extends Serializable with Logging { /** * Sets initial learning rate (default: 0.025). + * @since 1.1.0 */ def setLearningRate(learningRate: Double): this.type = { this.learningRate = learningRate @@ -98,6 +101,7 @@ class Word2Vec extends Serializable with Logging { /** * Sets number of partitions (default: 1). Use a small number for accuracy. + * @since 1.1.0 */ def setNumPartitions(numPartitions: Int): this.type = { require(numPartitions > 0, s"numPartitions must be greater than 0 but got $numPartitions") @@ -108,6 +112,7 @@ class Word2Vec extends Serializable with Logging { /** * Sets number of iterations (default: 1), which should be smaller than or equal to number of * partitions. + * @since 1.1.0 */ def setNumIterations(numIterations: Int): this.type = { this.numIterations = numIterations @@ -116,6 +121,7 @@ class Word2Vec extends Serializable with Logging { /** * Sets random seed (default: a random long integer). + * @since 1.1.0 */ def setSeed(seed: Long): this.type = { this.seed = seed @@ -125,6 +131,7 @@ class Word2Vec extends Serializable with Logging { /** * Sets minCount, the minimum number of times a token must appear to be included in the word2vec * model's vocabulary (default: 5). + * @since 1.3.0 */ def setMinCount(minCount: Int): this.type = { this.minCount = minCount @@ -262,6 +269,7 @@ class Word2Vec extends Serializable with Logging { * Computes the vector representation of each word in vocabulary. * @param dataset an RDD of words * @return a Word2VecModel + * @since 1.1.0 */ def fit[S <: Iterable[String]](dataset: RDD[S]): Word2VecModel = { @@ -411,6 +419,7 @@ class Word2Vec extends Serializable with Logging { * Computes the vector representation of each word in vocabulary (Java version). * @param dataset a JavaRDD of words * @return a Word2VecModel + * @since 1.1.0 */ def fit[S <: JavaIterable[String]](dataset: JavaRDD[S]): Word2VecModel = { fit(dataset.rdd.map(_.asScala)) @@ -454,6 +463,9 @@ class Word2VecModel private[mllib] ( wordVecNorms } + /** + * @since 1.5.0 + */ def this(model: Map[String, Array[Float]]) = { this(Word2VecModel.buildWordIndex(model), Word2VecModel.buildWordVectors(model)) } @@ -469,6 +481,9 @@ class Word2VecModel private[mllib] ( override protected def formatVersion = "1.0" + /** + * @since 1.4.0 + */ def save(sc: SparkContext, path: String): Unit = { Word2VecModel.SaveLoadV1_0.save(sc, path, getVectors) } @@ -477,6 +492,7 @@ class Word2VecModel private[mllib] ( * Transforms a word to its vector representation * @param word a word * @return vector representation of word + * @since 1.1.0 */ def transform(word: String): Vector = { wordIndex.get(word) match { @@ -493,6 +509,7 @@ class Word2VecModel private[mllib] ( * @param word a word * @param num number of synonyms to find * @return array of (word, cosineSimilarity) + * @since 1.1.0 */ def findSynonyms(word: String, num: Int): Array[(String, Double)] = { val vector = transform(word) @@ -504,6 +521,7 @@ class Word2VecModel private[mllib] ( * @param vector vector representation of a word * @param num number of synonyms to find * @return array of (word, cosineSimilarity) + * @since 1.1.0 */ def findSynonyms(vector: Vector, num: Int): Array[(String, Double)] = { require(num > 0, "Number of similar words should > 0") @@ -533,6 +551,7 @@ class Word2VecModel private[mllib] ( /** * Returns a map of words to their vector representations. + * @since 1.2.0 */ def getVectors: Map[String, Array[Float]] = { wordIndex.map { case (word, ind) => @@ -541,6 +560,9 @@ class Word2VecModel private[mllib] ( } } +/** + * @since 1.4.0 + */ @Experimental object Word2VecModel extends Loader[Word2VecModel] { @@ -600,6 +622,9 @@ object Word2VecModel extends Loader[Word2VecModel] { } } + /** + * @since 1.4.0 + */ override def load(sc: SparkContext, path: String): Word2VecModel = { val (loadedClassName, loadedVersion, metadata) = Loader.loadMetadata(sc, path) From 3778da07e7800d0d3412d04684f5ea73c277ac82 Mon Sep 17 00:00:00 2001 From: MechCoder Date: Thu, 20 Aug 2015 01:49:28 +0530 Subject: [PATCH 2/4] replace since tag with Since annotation --- .../spark/mllib/feature/ChiSqSelector.scala | 10 ++--- .../mllib/feature/ElementwiseProduct.scala | 6 +-- .../spark/mllib/feature/HashingTF.scala | 16 ++++---- .../org/apache/spark/mllib/feature/IDF.scala | 14 +++---- .../spark/mllib/feature/Normalizer.scala | 10 ++--- .../org/apache/spark/mllib/feature/PCA.scala | 9 +++-- .../spark/mllib/feature/StandardScaler.scala | 28 +++++--------- .../mllib/feature/VectorTransformer.scala | 8 ++-- .../apache/spark/mllib/feature/Word2Vec.scala | 38 +++++++++---------- 9 files changed, 64 insertions(+), 75 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala index 2195a46780ad6..75dd01aa5c71f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.feature import scala.collection.mutable.ArrayBuilder -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.stat.Statistics @@ -30,8 +30,8 @@ import org.apache.spark.rdd.RDD * Chi Squared selector model. * * @param selectedFeatures list of indices to select (filter). Must be ordered asc - * @since 1.3.0 */ +@Since("1.3.0") @Experimental class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransformer { @@ -52,8 +52,8 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf * * @param vector vector to be transformed. * @return transformed vector. - * @since 1.3.0 */ + @Since("1.3.0") override def transform(vector: Vector): Vector = { compress(vector, selectedFeatures) } @@ -108,8 +108,8 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf * Creates a ChiSquared feature selector. * @param numTopFeatures number of features that selector will select * (ordered by statistic value descending) - * @since 1.3.0 */ +@Since("1.3.0") @Experimental class ChiSqSelector (val numTopFeatures: Int) extends Serializable { @@ -119,8 +119,8 @@ class ChiSqSelector (val numTopFeatures: Int) extends Serializable { * @param data an `RDD[LabeledPoint]` containing the labeled dataset with categorical features. * Real-valued features will be treated as categorical for each distinct value. * Apply feature discretizer before using this function. - * @since 1.3.0 */ + @Since("1.3.0") def fit(data: RDD[LabeledPoint]): ChiSqSelectorModel = { val indices = Statistics.chiSqTest(data) .zipWithIndex.sortBy { case (res, _) => -res.statistic } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala index 1b49092abcb74..33e2d17bb472e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.mllib.linalg._ /** @@ -26,8 +26,8 @@ import org.apache.spark.mllib.linalg._ * provided "weight" vector. In other words, it scales each column of the dataset by a scalar * multiplier. * @param scalingVec The values used to scale the reference vector's individual components. - * @since 1.4.0 */ +@Since("1.4.0") @Experimental class ElementwiseProduct(val scalingVec: Vector) extends VectorTransformer { @@ -36,8 +36,8 @@ class ElementwiseProduct(val scalingVec: Vector) extends VectorTransformer { * * @param vector vector to be transformed. * @return transformed vector. - * @since 1.4.0 */ + @Since("1.4.0") override def transform(vector: Vector): Vector = { require(vector.size == scalingVec.size, s"vector sizes do not match: Expected ${scalingVec.size} but found ${vector.size}") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala index 8bc439563bf43..e47d524b61623 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala @@ -22,7 +22,7 @@ import java.lang.{Iterable => JavaIterable} import scala.collection.JavaConverters._ import scala.collection.mutable -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.rdd.RDD @@ -33,26 +33,26 @@ import org.apache.spark.util.Utils * Maps a sequence of terms to their term frequencies using the hashing trick. * * @param numFeatures number of features (default: 2^20^) - * @since 1.1.0 */ +@Since("1.1.0") @Experimental class HashingTF(val numFeatures: Int) extends Serializable { /** - * @since 1.1.0 */ + @Since("1.1.0") def this() = this(1 << 20) /** * Returns the index of the input term. - * @since 1.1.0 */ + @Since("1.1.0") def indexOf(term: Any): Int = Utils.nonNegativeMod(term.##, numFeatures) /** * Transforms the input document into a sparse term frequency vector. - * @since 1.1.0 */ + @Since("1.1.0") def transform(document: Iterable[_]): Vector = { val termFrequencies = mutable.HashMap.empty[Int, Double] document.foreach { term => @@ -64,24 +64,24 @@ class HashingTF(val numFeatures: Int) extends Serializable { /** * Transforms the input document into a sparse term frequency vector (Java version). - * @since 1.1.0 */ + @Since("1.1.0") def transform(document: JavaIterable[_]): Vector = { transform(document.asScala) } /** * Transforms the input document to term frequency vectors. - * @since 1.1.0 */ + @Since("1.1.0") def transform[D <: Iterable[_]](dataset: RDD[D]): RDD[Vector] = { dataset.map(this.transform) } /** * Transforms the input document to term frequency vectors (Java version). - * @since 1.1.0 */ + @Since("1.1.0") def transform[D <: JavaIterable[_]](dataset: JavaRDD[D]): JavaRDD[Vector] = { dataset.rdd.map(this.transform).toJavaRDD() } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala index b6d72d6e50866..d5353ddd972e0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.feature import breeze.linalg.{DenseVector => BDV} -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.rdd.RDD @@ -36,8 +36,8 @@ import org.apache.spark.rdd.RDD * * @param minDocFreq minimum of documents in which a term * should appear for filtering - * @since 1.1.0 */ +@Since("1.1.0") @Experimental class IDF(val minDocFreq: Int) { @@ -48,8 +48,8 @@ class IDF(val minDocFreq: Int) { /** * Computes the inverse document frequency. * @param dataset an RDD of term frequency vectors - * @since 1.1.0 */ + @Since("1.1.0") def fit(dataset: RDD[Vector]): IDFModel = { val idf = dataset.treeAggregate(new IDF.DocumentFrequencyAggregator( minDocFreq = minDocFreq))( @@ -62,8 +62,8 @@ class IDF(val minDocFreq: Int) { /** * Computes the inverse document frequency. * @param dataset a JavaRDD of term frequency vectors - * @since 1.1.0 */ + @Since("1.1.0") def fit(dataset: JavaRDD[Vector]): IDFModel = { fit(dataset.rdd) } @@ -173,8 +173,8 @@ class IDFModel private[spark] (val idf: Vector) extends Serializable { * * @param dataset an RDD of term frequency vectors * @return an RDD of TF-IDF vectors - * @since 1.1.0 */ + @Since("1.1.0") def transform(dataset: RDD[Vector]): RDD[Vector] = { val bcIdf = dataset.context.broadcast(idf) dataset.mapPartitions(iter => iter.map(v => IDFModel.transform(bcIdf.value, v))) @@ -185,16 +185,16 @@ class IDFModel private[spark] (val idf: Vector) extends Serializable { * * @param v a term frequency vector * @return a TF-IDF vector - * @since 1.3.0 */ + @Since("1.3.0") def transform(v: Vector): Vector = IDFModel.transform(idf, v) /** * Transforms term frequency (TF) vectors to TF-IDF vectors (Java version). * @param dataset a JavaRDD of term frequency vectors * @return a JavaRDD of TF-IDF vectors - * @since 1.1.0 */ + @Since("1.1.0") def transform(dataset: JavaRDD[Vector]): JavaRDD[Vector] = { transform(dataset.rdd).toJavaRDD() } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala index 1516af28a0b19..0e070257d9fb2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} /** @@ -30,14 +30,12 @@ import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors * For p = Double.PositiveInfinity, max(abs(vector)) will be used as norm for normalization. * * @param p Normalization in L^p^ space, p = 2 by default. - * @since 1.1.0 */ +@Since("1.1.0") @Experimental class Normalizer(p: Double) extends VectorTransformer { - /** - * @since 1.1.0 - */ + @Since("1.1.0") def this() = this(2) require(p >= 1.0) @@ -47,8 +45,8 @@ class Normalizer(p: Double) extends VectorTransformer { * * @param vector vector to be normalized. * @return normalized vector. If the norm of the input is zero, it will return the input vector. - * @since 1.1.0 */ + @Since("1.1.0") override def transform(vector: Vector): Vector = { val norm = Vectors.norm(vector, p) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index d33cc0b1c2e15..d6c70a3aad42f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -17,6 +17,7 @@ package org.apache.spark.mllib.feature +import org.apache.spark.annotation.Experimental import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg._ import org.apache.spark.mllib.linalg.distributed.RowMatrix @@ -26,8 +27,8 @@ import org.apache.spark.rdd.RDD * A feature transformer that projects vectors to a low-dimensional space using PCA. * * @param k number of principal components - * @since 1.4.0 */ +@Since("1.4.0") class PCA(val k: Int) { require(k >= 1, s"PCA requires a number of principal components k >= 1 but was given $k") @@ -35,8 +36,8 @@ class PCA(val k: Int) { * Computes a [[PCAModel]] that contains the principal components of the input vectors. * * @param sources source vectors - * @since 1.4.0 */ + @Since("1.4.0") def fit(sources: RDD[Vector]): PCAModel = { require(k <= sources.first().size, s"source vector size is ${sources.first().size} must be greater than k=$k") @@ -62,8 +63,8 @@ class PCA(val k: Int) { /** * Java-friendly version of [[fit()]] - * @since 1.4.0 */ + @Since("1.4.0") def fit(sources: JavaRDD[Vector]): PCAModel = fit(sources.rdd) } @@ -80,8 +81,8 @@ class PCAModel private[spark] (val k: Int, val pc: DenseMatrix) extends VectorTr * @param vector vector to be transformed. * Vector must be the same length as the source vectors given to [[PCA.fit()]]. * @return transformed vector. Vector will be of length k. - * @since 1.4.0 */ + @Since("1.4.0") override def transform(vector: Vector): Vector = { vector match { case dv: DenseVector => diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala index 9998602b0c2ac..b95d5a899001e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala @@ -18,7 +18,7 @@ package org.apache.spark.mllib.feature import org.apache.spark.Logging -import org.apache.spark.annotation.{DeveloperApi, Experimental} +import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer import org.apache.spark.rdd.RDD @@ -31,14 +31,12 @@ import org.apache.spark.rdd.RDD * @param withMean False by default. Centers the data with mean before scaling. It will build a * dense output, so this does not work on sparse input and will raise an exception. * @param withStd True by default. Scales the data to unit standard deviation. - * @since 1.1.0 */ +@Since("1.1.0") @Experimental class StandardScaler(withMean: Boolean, withStd: Boolean) extends Logging { - /** - * @since 1.1.0 - */ + @Since("1.1.0") def this() = this(false, true) if (!(withMean || withStd)) { @@ -50,8 +48,8 @@ class StandardScaler(withMean: Boolean, withStd: Boolean) extends Logging { * * @param data The data used to compute the mean and variance to build the transformation model. * @return a StandardScalarModel - * @since 1.1.0 */ + @Since("1.1.0") def fit(data: RDD[Vector]): StandardScalerModel = { // TODO: skip computation if both withMean and withStd are false val summary = data.treeAggregate(new MultivariateOnlineSummarizer)( @@ -73,8 +71,8 @@ class StandardScaler(withMean: Boolean, withStd: Boolean) extends Logging { * @param mean column mean values * @param withStd whether to scale the data to have unit standard deviation * @param withMean whether to center the data before scaling - * @since 1.1.0 */ +@Since("1.1.0") @Experimental class StandardScalerModel ( val std: Vector, @@ -83,8 +81,8 @@ class StandardScalerModel ( var withMean: Boolean) extends VectorTransformer { /** - * @since 1.3.0 */ + @Since("1.3.0") def this(std: Vector, mean: Vector) { this(std, mean, withStd = std != null, withMean = mean != null) require(this.withStd || this.withMean, @@ -95,14 +93,10 @@ class StandardScalerModel ( } } - /** - * @since 1.3.0 - */ + @Since("1.3.0") def this(std: Vector) = this(std, null) - /** - * @since 1.3.0 - */ + @Since("1.3.0") @DeveloperApi def setWithMean(withMean: Boolean): this.type = { require(!(withMean && this.mean == null), "cannot set withMean to true while mean is null") @@ -110,9 +104,7 @@ class StandardScalerModel ( this } - /** - * @since 1.3.0 - */ + @Since("1.3.0") @DeveloperApi def setWithStd(withStd: Boolean): this.type = { require(!(withStd && this.std == null), @@ -132,8 +124,8 @@ class StandardScalerModel ( * @param vector Vector to be standardized. * @return Standardized vector. If the std of a column is zero, it will return default `0.0` * for the column with zero std. - * @since 1.1.0 */ + @Since("1.1.0") override def transform(vector: Vector): Vector = { require(mean.size == vector.size) if (withMean) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala index 9e0218d364972..cd8a05c961618 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala @@ -25,8 +25,8 @@ import org.apache.spark.rdd.RDD /** * :: DeveloperApi :: * Trait for transformation of a vector - * @since 1.1.0 */ +@Since("1.1.0") @DeveloperApi trait VectorTransformer extends Serializable { @@ -35,8 +35,8 @@ trait VectorTransformer extends Serializable { * * @param vector vector to be transformed. * @return transformed vector. - * @since 1.1.0 */ + @Since("1.1.0") def transform(vector: Vector): Vector /** @@ -44,8 +44,8 @@ trait VectorTransformer extends Serializable { * * @param data RDD[Vector] to be transformed. * @return transformed RDD[Vector]. - * @since 1.1.0 */ + @Since("1.1.0") def transform(data: RDD[Vector]): RDD[Vector] = { // Later in #1498 , all RDD objects are sent via broadcasting instead of akka. // So it should be no longer necessary to explicitly broadcast `this` object. @@ -57,8 +57,8 @@ trait VectorTransformer extends Serializable { * * @param data JavaRDD[Vector] to be transformed. * @return transformed JavaRDD[Vector]. - * @since 1.1.0 */ + @Since("1.1.0") def transform(data: JavaRDD[Vector]): JavaRDD[Vector] = { transform(data.rdd) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 77e2eceab794a..540603454569b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -32,7 +32,7 @@ import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.{Vector, Vectors, DenseMatrix, BLAS, DenseVector} import org.apache.spark.mllib.util.{Loader, Saveable} @@ -69,8 +69,8 @@ private case class VocabWord( * Efficient Estimation of Word Representations in Vector Space * and * Distributed Representations of Words and Phrases and their Compositionality. - * @since 1.1.0 */ +@Since("1.1.0") @Experimental class Word2Vec extends Serializable with Logging { @@ -83,8 +83,8 @@ class Word2Vec extends Serializable with Logging { /** * Sets vector size (default: 100). - * @since 1.1.0 */ + @Since("1.1.0") def setVectorSize(vectorSize: Int): this.type = { this.vectorSize = vectorSize this @@ -92,8 +92,8 @@ class Word2Vec extends Serializable with Logging { /** * Sets initial learning rate (default: 0.025). - * @since 1.1.0 */ + @Since("1.1.0") def setLearningRate(learningRate: Double): this.type = { this.learningRate = learningRate this @@ -101,8 +101,8 @@ class Word2Vec extends Serializable with Logging { /** * Sets number of partitions (default: 1). Use a small number for accuracy. - * @since 1.1.0 */ + @Since("1.1.0") def setNumPartitions(numPartitions: Int): this.type = { require(numPartitions > 0, s"numPartitions must be greater than 0 but got $numPartitions") this.numPartitions = numPartitions @@ -112,8 +112,8 @@ class Word2Vec extends Serializable with Logging { /** * Sets number of iterations (default: 1), which should be smaller than or equal to number of * partitions. - * @since 1.1.0 */ + @Since("1.1.0") def setNumIterations(numIterations: Int): this.type = { this.numIterations = numIterations this @@ -121,8 +121,8 @@ class Word2Vec extends Serializable with Logging { /** * Sets random seed (default: a random long integer). - * @since 1.1.0 */ + @Since("1.1.0") def setSeed(seed: Long): this.type = { this.seed = seed this @@ -131,8 +131,8 @@ class Word2Vec extends Serializable with Logging { /** * Sets minCount, the minimum number of times a token must appear to be included in the word2vec * model's vocabulary (default: 5). - * @since 1.3.0 */ + @Since("1.3.0") def setMinCount(minCount: Int): this.type = { this.minCount = minCount this @@ -269,8 +269,8 @@ class Word2Vec extends Serializable with Logging { * Computes the vector representation of each word in vocabulary. * @param dataset an RDD of words * @return a Word2VecModel - * @since 1.1.0 */ + @Since("1.1.0") def fit[S <: Iterable[String]](dataset: RDD[S]): Word2VecModel = { val words = dataset.flatMap(x => x) @@ -419,8 +419,8 @@ class Word2Vec extends Serializable with Logging { * Computes the vector representation of each word in vocabulary (Java version). * @param dataset a JavaRDD of words * @return a Word2VecModel - * @since 1.1.0 */ + @Since("1.1.0") def fit[S <: JavaIterable[String]](dataset: JavaRDD[S]): Word2VecModel = { fit(dataset.rdd.map(_.asScala)) } @@ -463,9 +463,7 @@ class Word2VecModel private[mllib] ( wordVecNorms } - /** - * @since 1.5.0 - */ + @Since("1.5.0") def this(model: Map[String, Array[Float]]) = { this(Word2VecModel.buildWordIndex(model), Word2VecModel.buildWordVectors(model)) } @@ -482,8 +480,8 @@ class Word2VecModel private[mllib] ( override protected def formatVersion = "1.0" /** - * @since 1.4.0 */ + @Since("1.4.0") def save(sc: SparkContext, path: String): Unit = { Word2VecModel.SaveLoadV1_0.save(sc, path, getVectors) } @@ -492,8 +490,8 @@ class Word2VecModel private[mllib] ( * Transforms a word to its vector representation * @param word a word * @return vector representation of word - * @since 1.1.0 */ + @Since("1.1.0") def transform(word: String): Vector = { wordIndex.get(word) match { case Some(ind) => @@ -509,8 +507,8 @@ class Word2VecModel private[mllib] ( * @param word a word * @param num number of synonyms to find * @return array of (word, cosineSimilarity) - * @since 1.1.0 */ + @Since("1.1.0") def findSynonyms(word: String, num: Int): Array[(String, Double)] = { val vector = transform(word) findSynonyms(vector, num) @@ -521,8 +519,8 @@ class Word2VecModel private[mllib] ( * @param vector vector representation of a word * @param num number of synonyms to find * @return array of (word, cosineSimilarity) - * @since 1.1.0 */ + @Since("1.1.0") def findSynonyms(vector: Vector, num: Int): Array[(String, Double)] = { require(num > 0, "Number of similar words should > 0") // TODO: optimize top-k @@ -551,8 +549,8 @@ class Word2VecModel private[mllib] ( /** * Returns a map of words to their vector representations. - * @since 1.2.0 */ + @Since("1.2.0") def getVectors: Map[String, Array[Float]] = { wordIndex.map { case (word, ind) => (word, wordVectors.slice(vectorSize * ind, vectorSize * ind + vectorSize)) @@ -561,8 +559,8 @@ class Word2VecModel private[mllib] ( } /** - * @since 1.4.0 */ +@Since("1.4.0") @Experimental object Word2VecModel extends Loader[Word2VecModel] { @@ -623,8 +621,8 @@ object Word2VecModel extends Loader[Word2VecModel] { } /** - * @since 1.4.0 */ + @Since("1.4.0") override def load(sc: SparkContext, path: String): Word2VecModel = { val (loadedClassName, loadedVersion, metadata) = Loader.loadMetadata(sc, path) From 4e038243415e084cfae22445f94ac701d3de0452 Mon Sep 17 00:00:00 2001 From: MechCoder Date: Thu, 20 Aug 2015 02:12:13 +0530 Subject: [PATCH 3/4] Add missing imports --- .../src/main/scala/org/apache/spark/mllib/feature/PCA.scala | 2 +- .../org/apache/spark/mllib/feature/VectorTransformer.scala | 2 +- .../scala/org/apache/spark/mllib/feature/Word2Vec.scala | 6 ------ 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index d6c70a3aad42f..a48b7bba665d7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg._ import org.apache.spark.mllib.linalg.distributed.RowMatrix diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala index cd8a05c961618..5778fd1d09254 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.feature -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.rdd.RDD diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 540603454569b..e6f45ae4b01d5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -479,8 +479,6 @@ class Word2VecModel private[mllib] ( override protected def formatVersion = "1.0" - /** - */ @Since("1.4.0") def save(sc: SparkContext, path: String): Unit = { Word2VecModel.SaveLoadV1_0.save(sc, path, getVectors) @@ -558,8 +556,6 @@ class Word2VecModel private[mllib] ( } } -/** - */ @Since("1.4.0") @Experimental object Word2VecModel extends Loader[Word2VecModel] { @@ -620,8 +616,6 @@ object Word2VecModel extends Loader[Word2VecModel] { } } - /** - */ @Since("1.4.0") override def load(sc: SparkContext, path: String): Word2VecModel = { From 4abbcb3c3eeadb83e0477637898ba914c94f51db Mon Sep 17 00:00:00 2001 From: MechCoder Date: Thu, 20 Aug 2015 23:08:31 +0530 Subject: [PATCH 4/4] update since for class variables --- .../org/apache/spark/mllib/feature/ChiSqSelector.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala index 75dd01aa5c71f..fdd974d7a391e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala @@ -33,7 +33,8 @@ import org.apache.spark.rdd.RDD */ @Since("1.3.0") @Experimental -class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransformer { +class ChiSqSelectorModel ( + @Since("1.3.0") val selectedFeatures: Array[Int]) extends VectorTransformer { require(isSorted(selectedFeatures), "Array has to be sorted asc") @@ -111,7 +112,8 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf */ @Since("1.3.0") @Experimental -class ChiSqSelector (val numTopFeatures: Int) extends Serializable { +class ChiSqSelector ( + @Since("1.3.0") val numTopFeatures: Int) extends Serializable { /** * Returns a ChiSquared feature selector.