-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-16421][EXAMPLES][ML] Improve ML Example Outputs #14308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7b4496b
6e4ed29
26718e9
ff066ce
53a2941
38c3199
a8093be
afe2b2a
b7384ce
ae2249a
d2d0671
bb2fcee
7ab936b
a59cc8d
479819d
a556742
6a8d36c
b634f9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
This file was deleted.
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,10 +18,20 @@ | |
| package org.apache.spark.examples.ml; | ||
|
|
||
| // $example on$ | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
|
|
||
| import org.apache.spark.ml.feature.MaxAbsScaler; | ||
| import org.apache.spark.ml.feature.MaxAbsScalerModel; | ||
| import org.apache.spark.ml.linalg.Vectors; | ||
| import org.apache.spark.ml.linalg.VectorUDT; | ||
| import org.apache.spark.sql.Dataset; | ||
| import org.apache.spark.sql.Row; | ||
| import org.apache.spark.sql.RowFactory; | ||
| import org.apache.spark.sql.types.DataTypes; | ||
| import org.apache.spark.sql.types.Metadata; | ||
| import org.apache.spark.sql.types.StructField; | ||
| import org.apache.spark.sql.types.StructType; | ||
| // $example off$ | ||
| import org.apache.spark.sql.SparkSession; | ||
|
|
||
|
|
@@ -34,10 +44,17 @@ public static void main(String[] args) { | |
| .getOrCreate(); | ||
|
|
||
| // $example on$ | ||
| Dataset<Row> dataFrame = spark | ||
| .read() | ||
| .format("libsvm") | ||
| .load("data/mllib/sample_libsvm_data.txt"); | ||
| List<Row> data = Arrays.asList( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the data change here? why change from reading the file?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The data in the file is fine, but uses sparse vectors so that when the result is output, it doesn't really show anything. Using just a small sample dataset, you can see what it is doing from the output before after |
||
| RowFactory.create(0, Vectors.dense(1.0, 0.1, -8.0)), | ||
| RowFactory.create(1, Vectors.dense(2.0, 1.0, -4.0)), | ||
| RowFactory.create(2, Vectors.dense(4.0, 10.0, 8.0)) | ||
| ); | ||
| StructType schema = new StructType(new StructField[]{ | ||
| new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), | ||
| new StructField("features", new VectorUDT(), false, Metadata.empty()) | ||
| }); | ||
| Dataset<Row> dataFrame = spark.createDataFrame(data, schema); | ||
|
|
||
| MaxAbsScaler scaler = new MaxAbsScaler() | ||
| .setInputCol("features") | ||
| .setOutputCol("scaledFeatures"); | ||
|
|
@@ -47,8 +64,9 @@ public static void main(String[] args) { | |
|
|
||
| // rescale each feature to range [-1, 1]. | ||
| Dataset<Row> scaledData = scalerModel.transform(dataFrame); | ||
| scaledData.show(); | ||
| scaledData.select("features", "scaledFeatures").show(); | ||
| // $example off$ | ||
|
|
||
| spark.stop(); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No big deal, but why the extra line break?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The 2 arrays that are printed are large and all the output get clumped together, looking like a huge block of text, so adding some separation makes it a bit more readable.