Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import org.apache.spark.mllib.regression.{IsotonicRegression, IsotonicRegression

object IsotonicRegressionExample {

def main(args: Array[String]) : Unit = {
def main(args: Array[String]): Unit = {

val conf = new SparkConf().setAppName("IsotonicRegressionExample")
val sc = new SparkContext(conf)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.mllib.regression.LabeledPoint

object NaiveBayesExample {

def main(args: Array[String]) : Unit = {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("NaiveBayesExample")
val sc = new SparkContext(conf)
// $example on$
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.SQLContext

object RegressionMetricsExample {
def main(args: Array[String]) : Unit = {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("RegressionMetricsExample")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,13 @@ final class OneVsRestModel private[ml] (
val updateUDF = udf { (predictions: Map[Int, Double], prediction: Vector) =>
predictions + ((index, prediction(1)))
}
val transformedDataset = model.transform(df).select(columns : _*)
val transformedDataset = model.transform(df).select(columns: _*)
val updatedDataset = transformedDataset
.withColumn(tmpColName, updateUDF(col(accColName), col(rawPredictionCol)))
val newColumns = origCols ++ List(col(tmpColName))

// switch out the intermediate column with the accumulator column
updatedDataset.select(newColumns : _*).withColumnRenamed(tmpColName, accColName)
updatedDataset.select(newColumns: _*).withColumnRenamed(tmpColName, accColName)
}

if (handlePersistence) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ private class ColumnPruner(columnsToPrune: Set[String]) extends Transformer {

override def transform(dataset: DataFrame): DataFrame = {
val columnsToKeep = dataset.columns.filter(!columnsToPrune.contains(_))
dataset.select(columnsToKeep.map(dataset.col) : _*)
dataset.select(columnsToKeep.map(dataset.col): _*)
}

override def transformSchema(schema: StructType): StructType = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class VectorAssembler(override val uid: String)
}
}

dataset.select(col("*"), assembleFunc(struct(args : _*)).as($(outputCol), metadata))
dataset.select(col("*"), assembleFunc(struct(args: _*)).as($(outputCol), metadata))
}

override def transformSchema(schema: StructType): StructType = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ private[ml] object RandomForest extends Logging {
val nodeToFeatures = getNodeToFeatures(treeToNodeToIndexInfo)
val nodeToFeaturesBc = input.sparkContext.broadcast(nodeToFeatures)

val partitionAggregates : RDD[(Int, DTStatsAggregator)] = if (nodeIdCache.nonEmpty) {
val partitionAggregates: RDD[(Int, DTStatsAggregator)] = if (nodeIdCache.nonEmpty) {
input.zip(nodeIdCache.get.nodeIdsForInstances).mapPartitions { points =>
// Construct a nodeStatsAggregators array to hold node aggregate stats,
// each node will have a nodeStatsAggregator
Expand Down Expand Up @@ -825,7 +825,7 @@ private[ml] object RandomForest extends Logging {
protected[tree] def findSplits(
input: RDD[LabeledPoint],
metadata: DecisionTreeMetadata,
seed : Long): Array[Array[Split]] = {
seed: Long): Array[Array[Split]] = {

logDebug("isMulticlass = " + metadata.isMulticlass)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
}

@Since("1.4.0")
override def load(sc: SparkContext, path: String) : GaussianMixtureModel = {
override def load(sc: SparkContext, path: String): GaussianMixtureModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
implicit val formats = DefaultFormats
val k = (metadata \ "k").extract[Int]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ object FPGrowthModel extends Loader[FPGrowthModel[_]] {
loadImpl(freqItemsets, sample)
}

def loadImpl[Item : ClassTag](freqItemsets: DataFrame, sample: Item): FPGrowthModel[Item] = {
def loadImpl[Item: ClassTag](freqItemsets: DataFrame, sample: Item): FPGrowthModel[Item] = {
val freqItemsetsRDD = freqItemsets.select("items", "freq").map { x =>
val items = x.getAs[Seq[Item]](0).toArray
val freq = x.getLong(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ class DenseMatrix @Since("1.3.0") (
}

override def hashCode: Int = {
com.google.common.base.Objects.hashCode(numRows : Integer, numCols: Integer, toArray)
com.google.common.base.Objects.hashCode(numRows: Integer, numCols: Integer, toArray)
}

private[mllib] def toBreeze: BM[Double] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ import org.apache.spark.mllib.regression.GeneralizedLinearModel
* PMML Model Export for GeneralizedLinearModel class with binary ClassificationModel
*/
private[mllib] class BinaryClassificationPMMLModelExport(
model : GeneralizedLinearModel,
description : String,
normalizationMethod : RegressionNormalizationMethodType,
model: GeneralizedLinearModel,
description: String,
normalizationMethod: RegressionNormalizationMethodType,
threshold: Double)
extends PMMLModelExport {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ import org.apache.spark.mllib.clustering.KMeansModel
/**
* PMML Model Export for KMeansModel class
*/
private[mllib] class KMeansPMMLModelExport(model : KMeansModel) extends PMMLModelExport{
private[mllib] class KMeansPMMLModelExport(model: KMeansModel) extends PMMLModelExport{

populateKMeansPMML(model)

/**
* Export the input KMeansModel model to PMML format.
*/
private def populateKMeansPMML(model : KMeansModel): Unit = {
private def populateKMeansPMML(model: KMeansModel): Unit = {
pmml.getHeader.setDescription("k-means clustering")

if (model.clusterCenters.length > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ object DecisionTree extends Serializable with Logging {
val nodeToFeatures = getNodeToFeatures(treeToNodeToIndexInfo)
val nodeToFeaturesBc = input.sparkContext.broadcast(nodeToFeatures)

val partitionAggregates : RDD[(Int, DTStatsAggregator)] = if (nodeIdCache.nonEmpty) {
val partitionAggregates: RDD[(Int, DTStatsAggregator)] = if (nodeIdCache.nonEmpty) {
input.zip(nodeIdCache.get.nodeIdsForInstances).mapPartitions { points =>
// Construct a nodeStatsAggregators array to hold node aggregate stats,
// each node will have a nodeStatsAggregator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class Node @Since("1.2.0") (
* @return predicted value
*/
@Since("1.1.0")
def predict(features: Vector) : Double = {
def predict(features: Vector): Double = {
if (isLeaf) {
predict.predict
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ object LinearDataGenerator {
nfeatures: Int,
eps: Double,
nparts: Int = 2,
intercept: Double = 0.0) : RDD[LabeledPoint] = {
intercept: Double = 0.0): RDD[LabeledPoint] = {
val random = new Random(42)
// Random values distributed uniformly in [-0.5, 0.5]
val w = Array.fill(nfeatures)(random.nextDouble() - 0.5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ object SVMSuite {
nPoints: Int,
seed: Int): Seq[LabeledPoint] = {
val rnd = new Random(seed)
val weightsMat = new DoubleMatrix(1, weights.length, weights : _*)
val weightsMat = new DoubleMatrix(1, weights.length, weights: _*)
val x = Array.fill[Array[Double]](nPoints)(
Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
val y = x.map { xi =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.util.random.XORShiftRandom

class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {

override def maxWaitTimeMillis : Int = 30000
override def maxWaitTimeMillis: Int = 30000

test("accuracy for null hypothesis using welch t-test") {
// set parameters
Expand Down