Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,19 @@
import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.regression.AFTSurvivalRegression;
import org.apache.spark.ml.regression.AFTSurvivalRegressionModel;
import org.apache.spark.mllib.linalg.*;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.*;
// $example off$

public class JavaAFTSurvivalRegressionExample {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("JavaAFTSurvivalRegressionExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext jsql = new SQLContext(jsc);
SparkSession spark = SparkSession.builder().appName("JavaAFTSurvivalRegressionExample").getOrCreate();
Copy link
Contributor

@andrewor14 andrewor14 May 4, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line too long, can you break it into multiple lines (here and other places)

Copy link
Member Author

@dongjoon-hyun dongjoon-hyun May 4, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure! Two or more?

Two Lines

SparkSession spark = SparkSession
  .builder().appName("JavaAFTSurvivalRegressionExample").getOrCreate();

More Lines

SparkSession spark = SparkSession
  .builder()
  .appName("JavaAFTSurvivalRegressionExample")
  .getOrCreate();

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anyway, thank you for fast review! :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

more lines looks better to me

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I'll use that format for all languages.


// $example on$
List<Row> data = Arrays.asList(
Expand All @@ -52,7 +48,7 @@ public static void main(String[] args) {
new StructField("censor", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("features", new VectorUDT(), false, Metadata.empty())
});
Dataset<Row> training = jsql.createDataFrame(data, schema);
Dataset<Row> training = spark.createDataFrame(data, schema);
double[] quantileProbabilities = new double[]{0.3, 0.6};
AFTSurvivalRegression aft = new AFTSurvivalRegression()
.setQuantileProbabilities(quantileProbabilities)
Expand All @@ -66,6 +62,6 @@ public static void main(String[] args) {
model.transform(training).show(false);
// $example off$

jsc.stop();
spark.stop();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@

package org.apache.spark.examples.ml;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;

// $example on$
import java.io.Serializable;
Expand Down Expand Up @@ -83,18 +81,17 @@ public static Rating parseRating(String str) {
// $example off$

public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("JavaALSExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(jsc);
SparkSession spark = SparkSession.builder().appName("JavaALSExample").getOrCreate();

// $example on$
JavaRDD<Rating> ratingsRDD = jsc.textFile("data/mllib/als/sample_movielens_ratings.txt")
JavaRDD<Rating> ratingsRDD = spark
.read().text("data/mllib/als/sample_movielens_ratings.txt").javaRDD()
.map(new Function<String, Rating>() {
public Rating call(String str) {
return Rating.parseRating(str);
}
});
Dataset<Row> ratings = sqlContext.createDataFrame(ratingsRDD, Rating.class);
Dataset<Row> ratings = spark.createDataFrame(ratingsRDD, Rating.class);
Dataset<Row>[] splits = ratings.randomSplit(new double[]{0.8, 0.2});
Dataset<Row> training = splits[0];
Dataset<Row> test = splits[1];
Expand All @@ -121,6 +118,6 @@ public Rating call(String str) {
Double rmse = evaluator.evaluate(predictions);
System.out.println("Root-mean-square error = " + rmse);
// $example off$
jsc.stop();
spark.stop();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;

// $example on$
import java.util.Arrays;
import java.util.List;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.Binarizer;
Expand All @@ -37,21 +38,19 @@

public class JavaBinarizerExample {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("JavaBinarizerExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext jsql = new SQLContext(jsc);
SparkSession spark = SparkSession.builder().appName("JavaBinarizerExample").getOrCreate();

// $example on$
JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
List<Row> data = Arrays.asList(
RowFactory.create(0, 0.1),
RowFactory.create(1, 0.8),
RowFactory.create(2, 0.2)
));
);
StructType schema = new StructType(new StructField[]{
new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
});
Dataset<Row> continuousDataFrame = jsql.createDataFrame(jrdd, schema);
Dataset<Row> continuousDataFrame = spark.createDataFrame(data, schema);
Binarizer binarizer = new Binarizer()
.setInputCol("feature")
.setOutputCol("binarized_feature")
Expand All @@ -63,6 +62,6 @@ public static void main(String[] args) {
System.out.println(binarized_value);
}
// $example off$
jsc.stop();
spark.stop();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@
package org.apache.spark.examples.ml;

import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
// $example on$
import org.apache.spark.ml.clustering.BisectingKMeans;
import org.apache.spark.ml.clustering.BisectingKMeansModel;
Expand All @@ -44,25 +42,23 @@
public class JavaBisectingKMeansExample {

public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("JavaBisectingKMeansExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext jsql = new SQLContext(jsc);
SparkSession spark = SparkSession.builder().appName("JavaBisectingKMeansExample").getOrCreate();

// $example on$
JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
List<Row> data = Arrays.asList(
RowFactory.create(Vectors.dense(0.1, 0.1, 0.1)),
RowFactory.create(Vectors.dense(0.3, 0.3, 0.25)),
RowFactory.create(Vectors.dense(0.1, 0.1, -0.1)),
RowFactory.create(Vectors.dense(20.3, 20.1, 19.9)),
RowFactory.create(Vectors.dense(20.2, 20.1, 19.7)),
RowFactory.create(Vectors.dense(18.9, 20.0, 19.7))
));
);

StructType schema = new StructType(new StructField[]{
new StructField("features", new VectorUDT(), false, Metadata.empty()),
});

Dataset<Row> dataset = jsql.createDataFrame(data, schema);
Dataset<Row> dataset = spark.createDataFrame(data, schema);

BisectingKMeans bkm = new BisectingKMeans().setK(2);
BisectingKMeansModel model = bkm.fit(dataset);
Expand All @@ -76,6 +72,6 @@ public static void main(String[] args) {
}
// $example off$

jsc.stop();
spark.stop();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,12 @@

package org.apache.spark.examples.ml;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;

// $example on$
import java.util.Arrays;
import java.util.List;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.Bucketizer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
Expand All @@ -37,23 +35,21 @@

public class JavaBucketizerExample {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("JavaBucketizerExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext jsql = new SQLContext(jsc);
SparkSession spark = SparkSession.builder().appName("JavaBucketizerExample").getOrCreate();

// $example on$
double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};

JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
List<Row> data = Arrays.asList(
RowFactory.create(-0.5),
RowFactory.create(-0.3),
RowFactory.create(0.0),
RowFactory.create(0.2)
));
);
StructType schema = new StructType(new StructField[]{
new StructField("features", DataTypes.DoubleType, false, Metadata.empty())
});
Dataset<Row> dataFrame = jsql.createDataFrame(data, schema);
Dataset<Row> dataFrame = spark.createDataFrame(data, schema);

Bucketizer bucketizer = new Bucketizer()
.setInputCol("features")
Expand All @@ -64,7 +60,7 @@ public static void main(String[] args) {
Dataset<Row> bucketedData = bucketizer.transform(dataFrame);
bucketedData.show();
// $example off$
jsc.stop();
spark.stop();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;

// $example on$
import java.util.Arrays;
import java.util.List;

import org.apache.spark.ml.feature.ChiSqSelector;
import org.apache.spark.mllib.linalg.VectorUDT;
Expand All @@ -39,23 +40,21 @@

public class JavaChiSqSelectorExample {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("JavaChiSqSelectorExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(jsc);
SparkSession spark = SparkSession.builder().appName("JavaChiSqSelectorExample").getOrCreate();

// $example on$
JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
List<Row> data = Arrays.asList(
RowFactory.create(7, Vectors.dense(0.0, 0.0, 18.0, 1.0), 1.0),
RowFactory.create(8, Vectors.dense(0.0, 1.0, 12.0, 0.0), 0.0),
RowFactory.create(9, Vectors.dense(1.0, 0.0, 15.0, 0.1), 0.0)
));
);
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
new StructField("features", new VectorUDT(), false, Metadata.empty()),
new StructField("clicked", DataTypes.DoubleType, false, Metadata.empty())
});

Dataset<Row> df = sqlContext.createDataFrame(jrdd, schema);
Dataset<Row> df = spark.createDataFrame(data, schema);

ChiSqSelector selector = new ChiSqSelector()
.setNumTopFeatures(1)
Expand All @@ -66,6 +65,6 @@ public static void main(String[] args) {
Dataset<Row> result = selector.fit(df).transform(df);
result.show();
// $example off$
jsc.stop();
spark.stop();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,31 @@

// $example on$
import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.*;
// $example off$

public class JavaCountVectorizerExample {
public static void main(String[] args) {

SparkConf conf = new SparkConf().setAppName("JavaCountVectorizerExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(jsc);
SparkSession spark = SparkSession.builder().appName("JavaCountVectorizerExample").getOrCreate();

// $example on$
// Input data: Each row is a bag of words from a sentence or document.
JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
List<Row> data = Arrays.asList(
RowFactory.create(Arrays.asList("a", "b", "c")),
RowFactory.create(Arrays.asList("a", "b", "b", "c", "a"))
));
);
StructType schema = new StructType(new StructField [] {
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
});
Dataset<Row> df = sqlContext.createDataFrame(jrdd, schema);
Dataset<Row> df = spark.createDataFrame(data, schema);

// fit a CountVectorizerModel from the corpus
CountVectorizerModel cvModel = new CountVectorizer()
Expand All @@ -66,6 +61,6 @@ public static void main(String[] args) {
cvModel.transform(df).show();
// $example off$

jsc.stop();
spark.stop();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;

// $example on$
import java.util.Arrays;
import java.util.List;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.DCT;
Expand All @@ -38,28 +39,26 @@

public class JavaDCTExample {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("JavaDCTExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext jsql = new SQLContext(jsc);
SparkSession spark = SparkSession.builder().appName("JavaDCTExample").getOrCreate();

// $example on$
JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
List<Row> data = Arrays.asList(
RowFactory.create(Vectors.dense(0.0, 1.0, -2.0, 3.0)),
RowFactory.create(Vectors.dense(-1.0, 2.0, 4.0, -7.0)),
RowFactory.create(Vectors.dense(14.0, -2.0, -5.0, 1.0))
));
);
StructType schema = new StructType(new StructField[]{
new StructField("features", new VectorUDT(), false, Metadata.empty()),
});
Dataset<Row> df = jsql.createDataFrame(data, schema);
Dataset<Row> df = spark.createDataFrame(data, schema);
DCT dct = new DCT()
.setInputCol("features")
.setOutputCol("featuresDCT")
.setInverse(false);
Dataset<Row> dctDf = dct.transform(df);
dctDf.select("featuresDCT").show(3);
// $example off$
jsc.stop();
spark.stop();
}
}

Loading