Skip to content

Commit 9e266d0

Browse files
zhengruifengAndrew Or
authored andcommitted
[SPARK-15031][SPARK-15134][EXAMPLE][DOC] Use SparkSession and update indent in examples
## What changes were proposed in this pull request? 1, Use `SparkSession` according to [SPARK-15031](https://issues.apache.org/jira/browse/SPARK-15031) 2, Update indent for `SparkContext` according to [SPARK-15134](https://issues.apache.org/jira/browse/SPARK-15134) 3, BTW, remove some duplicate space and add missing '.' ## How was this patch tested? manual tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #13050 from zhengruifeng/use_sparksession.
1 parent ba5487c commit 9e266d0

34 files changed

+192
-151
lines changed

examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
public class JavaDecisionTreeClassificationExample {
3333
public static void main(String[] args) {
3434
SparkSession spark = SparkSession
35-
.builder().appName("JavaDecisionTreeClassificationExample").getOrCreate();
35+
.builder()
36+
.appName("JavaDecisionTreeClassificationExample")
37+
.getOrCreate();
3638

3739
// $example on$
3840
// Load the data stored in LIBSVM format as a DataFrame.
@@ -52,10 +54,10 @@ public static void main(String[] args) {
5254
VectorIndexerModel featureIndexer = new VectorIndexer()
5355
.setInputCol("features")
5456
.setOutputCol("indexedFeatures")
55-
.setMaxCategories(4) // features with > 4 distinct values are treated as continuous
57+
.setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
5658
.fit(data);
5759

58-
// Split the data into training and test sets (30% held out for testing)
60+
// Split the data into training and test sets (30% held out for testing).
5961
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
6062
Dataset<Row> trainingData = splits[0];
6163
Dataset<Row> testData = splits[1];
@@ -71,11 +73,11 @@ public static void main(String[] args) {
7173
.setOutputCol("predictedLabel")
7274
.setLabels(labelIndexer.labels());
7375

74-
// Chain indexers and tree in a Pipeline
76+
// Chain indexers and tree in a Pipeline.
7577
Pipeline pipeline = new Pipeline()
7678
.setStages(new PipelineStage[]{labelIndexer, featureIndexer, dt, labelConverter});
7779

78-
// Train model. This also runs the indexers.
80+
// Train model. This also runs the indexers.
7981
PipelineModel model = pipeline.fit(trainingData);
8082

8183
// Make predictions.
@@ -84,7 +86,7 @@ public static void main(String[] args) {
8486
// Select example rows to display.
8587
predictions.select("predictedLabel", "label", "features").show(5);
8688

87-
// Select (prediction, true label) and compute test error
89+
// Select (prediction, true label) and compute test error.
8890
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
8991
.setLabelCol("indexedLabel")
9092
.setPredictionCol("prediction")

examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
public class JavaDecisionTreeRegressionExample {
3434
public static void main(String[] args) {
3535
SparkSession spark = SparkSession
36-
.builder().appName("JavaDecisionTreeRegressionExample").getOrCreate();
36+
.builder()
37+
.appName("JavaDecisionTreeRegressionExample")
38+
.getOrCreate();
3739
// $example on$
3840
// Load the data stored in LIBSVM format as a DataFrame.
3941
Dataset<Row> data = spark.read().format("libsvm")
@@ -47,7 +49,7 @@ public static void main(String[] args) {
4749
.setMaxCategories(4)
4850
.fit(data);
4951

50-
// Split the data into training and test sets (30% held out for testing)
52+
// Split the data into training and test sets (30% held out for testing).
5153
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
5254
Dataset<Row> trainingData = splits[0];
5355
Dataset<Row> testData = splits[1];
@@ -56,11 +58,11 @@ public static void main(String[] args) {
5658
DecisionTreeRegressor dt = new DecisionTreeRegressor()
5759
.setFeaturesCol("indexedFeatures");
5860

59-
// Chain indexer and tree in a Pipeline
61+
// Chain indexer and tree in a Pipeline.
6062
Pipeline pipeline = new Pipeline()
6163
.setStages(new PipelineStage[]{featureIndexer, dt});
6264

63-
// Train model. This also runs the indexer.
65+
// Train model. This also runs the indexer.
6466
PipelineModel model = pipeline.fit(trainingData);
6567

6668
// Make predictions.
@@ -69,7 +71,7 @@ public static void main(String[] args) {
6971
// Select example rows to display.
7072
predictions.select("label", "features").show(5);
7173

72-
// Select (prediction, true label) and compute test error
74+
// Select (prediction, true label) and compute test error.
7375
RegressionEvaluator evaluator = new RegressionEvaluator()
7476
.setLabelCol("label")
7577
.setPredictionCol("prediction")

examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,15 @@ public static void main(String[] args) throws Exception {
6262
new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5)));
6363
Dataset<Row> training = spark.createDataFrame(localTraining, LabeledPoint.class);
6464

65-
// Create a LogisticRegression instance. This instance is an Estimator.
65+
// Create a LogisticRegression instance. This instance is an Estimator.
6666
MyJavaLogisticRegression lr = new MyJavaLogisticRegression();
6767
// Print out the parameters, documentation, and any default values.
6868
System.out.println("MyJavaLogisticRegression parameters:\n" + lr.explainParams() + "\n");
6969

7070
// We may set parameters using setter methods.
7171
lr.setMaxIter(10);
7272

73-
// Learn a LogisticRegression model. This uses the parameters stored in lr.
73+
// Learn a LogisticRegression model. This uses the parameters stored in lr.
7474
MyJavaLogisticRegressionModel model = lr.fit(training);
7575

7676
// Prepare test data.
@@ -214,7 +214,7 @@ public Vector predictRaw(Vector features) {
214214
}
215215

216216
/**
217-
* Number of classes the label can take. 2 indicates binary classification.
217+
* Number of classes the label can take. 2 indicates binary classification.
218218
*/
219219
public int numClasses() { return 2; }
220220

examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@
3838
public class JavaEstimatorTransformerParamExample {
3939
public static void main(String[] args) {
4040
SparkSession spark = SparkSession
41-
.builder().appName("JavaEstimatorTransformerParamExample").getOrCreate();
41+
.builder()
42+
.appName("JavaEstimatorTransformerParamExample")
43+
.getOrCreate();
4244

4345
// $example on$
4446
// Prepare training data.

examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ public static void main(String[] args) {
7575
.setOutputCol("predictedLabel")
7676
.setLabels(labelIndexer.labels());
7777

78-
// Chain indexers and GBT in a Pipeline
78+
// Chain indexers and GBT in a Pipeline.
7979
Pipeline pipeline = new Pipeline()
8080
.setStages(new PipelineStage[] {labelIndexer, featureIndexer, gbt, labelConverter});
8181

82-
// Train model. This also runs the indexers.
82+
// Train model. This also runs the indexers.
8383
PipelineModel model = pipeline.fit(trainingData);
8484

8585
// Make predictions.
@@ -88,7 +88,7 @@ public static void main(String[] args) {
8888
// Select example rows to display.
8989
predictions.select("predictedLabel", "label", "features").show(5);
9090

91-
// Select (prediction, true label) and compute test error
91+
// Select (prediction, true label) and compute test error.
9292
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
9393
.setLabelCol("indexedLabel")
9494
.setPredictionCol("prediction")

examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@
3434
public class JavaGradientBoostedTreeRegressorExample {
3535
public static void main(String[] args) {
3636
SparkSession spark = SparkSession
37-
.builder().appName("JavaGradientBoostedTreeRegressorExample").getOrCreate();
37+
.builder()
38+
.appName("JavaGradientBoostedTreeRegressorExample")
39+
.getOrCreate();
3840

3941
// $example on$
4042
// Load and parse the data file, converting it to a DataFrame.
@@ -48,7 +50,7 @@ public static void main(String[] args) {
4850
.setMaxCategories(4)
4951
.fit(data);
5052

51-
// Split the data into training and test sets (30% held out for testing)
53+
// Split the data into training and test sets (30% held out for testing).
5254
Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
5355
Dataset<Row> trainingData = splits[0];
5456
Dataset<Row> testData = splits[1];
@@ -59,10 +61,10 @@ public static void main(String[] args) {
5961
.setFeaturesCol("indexedFeatures")
6062
.setMaxIter(10);
6163

62-
// Chain indexer and GBT in a Pipeline
64+
// Chain indexer and GBT in a Pipeline.
6365
Pipeline pipeline = new Pipeline().setStages(new PipelineStage[] {featureIndexer, gbt});
6466

65-
// Train model. This also runs the indexer.
67+
// Train model. This also runs the indexer.
6668
PipelineModel model = pipeline.fit(trainingData);
6769

6870
// Make predictions.
@@ -71,7 +73,7 @@ public static void main(String[] args) {
7173
// Select example rows to display.
7274
predictions.select("prediction", "label", "features").show(5);
7375

74-
// Select (prediction, true label) and compute test error
76+
// Select (prediction, true label) and compute test error.
7577
RegressionEvaluator evaluator = new RegressionEvaluator()
7678
.setLabelCol("label")
7779
.setPredictionCol("prediction")

examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,12 @@
3030
public class JavaLinearRegressionWithElasticNetExample {
3131
public static void main(String[] args) {
3232
SparkSession spark = SparkSession
33-
.builder().appName("JavaLinearRegressionWithElasticNetExample").getOrCreate();
33+
.builder()
34+
.appName("JavaLinearRegressionWithElasticNetExample")
35+
.getOrCreate();
3436

3537
// $example on$
36-
// Load training data
38+
// Load training data.
3739
Dataset<Row> training = spark.read().format("libsvm")
3840
.load("data/mllib/sample_linear_regression_data.txt");
3941

@@ -42,14 +44,14 @@ public static void main(String[] args) {
4244
.setRegParam(0.3)
4345
.setElasticNetParam(0.8);
4446

45-
// Fit the model
47+
// Fit the model.
4648
LinearRegressionModel lrModel = lr.fit(training);
4749

48-
// Print the coefficients and intercept for linear regression
50+
// Print the coefficients and intercept for linear regression.
4951
System.out.println("Coefficients: "
5052
+ lrModel.coefficients() + " Intercept: " + lrModel.intercept());
5153

52-
// Summarize the model over the training set and print out some metrics
54+
// Summarize the model over the training set and print out some metrics.
5355
LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
5456
System.out.println("numIterations: " + trainingSummary.totalIterations());
5557
System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));

examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131
public class JavaLogisticRegressionSummaryExample {
3232
public static void main(String[] args) {
3333
SparkSession spark = SparkSession
34-
.builder().appName("JavaLogisticRegressionSummaryExample").getOrCreate();
34+
.builder()
35+
.appName("JavaLogisticRegressionSummaryExample")
36+
.getOrCreate();
3537

3638
// Load training data
3739
Dataset<Row> training = spark.read().format("libsvm")

examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
public class JavaLogisticRegressionWithElasticNetExample {
2929
public static void main(String[] args) {
3030
SparkSession spark = SparkSession
31-
.builder().appName("JavaLogisticRegressionWithElasticNetExample").getOrCreate();
31+
.builder()
32+
.appName("JavaLogisticRegressionWithElasticNetExample")
33+
.getOrCreate();
3234

3335
// $example on$
3436
// Load training data

examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@
4343
public class JavaModelSelectionViaCrossValidationExample {
4444
public static void main(String[] args) {
4545
SparkSession spark = SparkSession
46-
.builder().appName("JavaModelSelectionViaCrossValidationExample").getOrCreate();
46+
.builder()
47+
.appName("JavaModelSelectionViaCrossValidationExample")
48+
.getOrCreate();
4749

4850
// $example on$
4951
// Prepare training documents, which are labeled.

0 commit comments

Comments
 (0)