Skip to content

Commit 83013fb

Browse files
committed
Update Java Example.
1 parent e7a6c19 commit 83013fb

File tree

2 files changed

+85
-10
lines changed

2 files changed

+85
-10
lines changed

docs/sql-programming-guide.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -405,23 +405,23 @@ JavaSQLContext sqlCtx = new org.apache.spark.sql.api.java.JavaSQLContext(sc);
405405

406406
// A JSON dataset is pointed by path.
407407
// The path can be either a single text file or a directory storing text files.
408-
String path = "examples/src/main/resources/people.json"
408+
String path = "examples/src/main/resources/people.json";
409409
// Create a JavaSchemaRDD from the file(s) pointed by path
410-
JavaSchemaRDD people = sqlCtx.jsonFile(path)
410+
JavaSchemaRDD people = sqlCtx.jsonFile(path);
411411

412412
// Because the schema of a JSON dataset is automatically inferred, to write queries,
413413
// it is better to take a look at what is the schema.
414-
people.printSchema()
414+
people.printSchema();
415415
// The schema of people is ...
416416
// root
417417
// |-- age: IntegerType
418418
// |-- name: StringType
419419

420420
// Register this JavaSchemaRDD as a table.
421-
people.registerAsTable("people")
421+
people.registerAsTable("people");
422422

423423
// SQL statements can be run by using the sql methods provided by sqlCtx.
424-
JavaSchemaRDD teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
424+
JavaSchemaRDD teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
425425

426426
// The results of SQL queries are JavaSchemaRDDs and support all the normal RDD operations.
427427
// The columns of a row in the result can be accessed by ordinal.
@@ -435,11 +435,11 @@ List<String> teenagerNames = teenagers.map(new Function<Row, String>() {
435435
// a RDD[String] storing one JSON object per string.
436436
List<String> jsonData = Arrays.asList(
437437
"{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
438-
JavaRDD<String> anotherPeopleRDD = sc.parallelize(jsonData)
439-
val anotherPeople = sqlCtx.jsonRDD(anotherPeopleRDD)
438+
JavaRDD<String> anotherPeopleRDD = sc.parallelize(jsonData);
439+
JavaSchemaRDD anotherPeople = sqlCtx.jsonRDD(anotherPeopleRDD);
440440

441441
// Take a look at the schema of this new JavaSchemaRDD.
442-
anotherPeople.printSchema()
442+
anotherPeople.printSchema();
443443
// The schema of anotherPeople is ...
444444
// root
445445
// |-- address: StructType

examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.examples.sql;
1919

2020
import java.io.Serializable;
21+
import java.util.Arrays;
2122
import java.util.List;
2223

2324
import org.apache.spark.SparkConf;
@@ -56,6 +57,8 @@ public static void main(String[] args) throws Exception {
5657
JavaSparkContext ctx = new JavaSparkContext(sparkConf);
5758
JavaSQLContext sqlCtx = new JavaSQLContext(ctx);
5859

60+
61+
System.out.println("=== Data source: RDD ===");
5962
// Load a text file and convert each line to a Java Bean.
6063
JavaRDD<Person> people = ctx.textFile("examples/src/main/resources/people.txt").map(
6164
new Function<String, Person>() {
@@ -84,16 +87,88 @@ public String call(Row row) {
8487
return "Name: " + row.getString(0);
8588
}
8689
}).collect();
90+
for (String name: teenagerNames) {
91+
System.out.println(name);
92+
}
8793

94+
System.out.println("=== Data source: Parquet File ===");
8895
// JavaSchemaRDDs can be saved as parquet files, maintaining the schema information.
8996
schemaPeople.saveAsParquetFile("people.parquet");
9097

91-
// Read in the parquet file created above. Parquet files are self-describing so the schema is preserved.
98+
// Read in the parquet file created above.
99+
// Parquet files are self-describing so the schema is preserved.
92100
// The result of loading a parquet file is also a JavaSchemaRDD.
93101
JavaSchemaRDD parquetFile = sqlCtx.parquetFile("people.parquet");
94102

95103
//Parquet files can also be registered as tables and then used in SQL statements.
96104
parquetFile.registerAsTable("parquetFile");
97-
JavaSchemaRDD teenagers2 = sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
105+
JavaSchemaRDD teenagers2 =
106+
sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
107+
teenagerNames = teenagers2.map(new Function<Row, String>() {
108+
public String call(Row row) {
109+
return "Name: " + row.getString(0);
110+
}
111+
}).collect();
112+
for (String name: teenagerNames) {
113+
System.out.println(name);
114+
}
115+
116+
System.out.println("=== Data source: JSON Dataset ===");
117+
// A JSON dataset is pointed by path.
118+
// The path can be either a single text file or a directory storing text files.
119+
String path = "examples/src/main/resources/people.json";
120+
// Create a JavaSchemaRDD from the file(s) pointed by path
121+
JavaSchemaRDD peopleFromJsonFile = sqlCtx.jsonFile(path);
122+
123+
// Because the schema of a JSON dataset is automatically inferred, to write queries,
124+
// it is better to take a look at what is the schema.
125+
peopleFromJsonFile.printSchema();
126+
// The schema of people is ...
127+
// root
128+
// |-- age: IntegerType
129+
// |-- name: StringType
130+
131+
// Register this JavaSchemaRDD as a table.
132+
peopleFromJsonFile.registerAsTable("people");
133+
134+
// SQL statements can be run by using the sql methods provided by sqlCtx.
135+
JavaSchemaRDD teenagers3 = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
136+
137+
// The results of SQL queries are JavaSchemaRDDs and support all the normal RDD operations.
138+
// The columns of a row in the result can be accessed by ordinal.
139+
teenagerNames = teenagers3.map(new Function<Row, String>() {
140+
public String call(Row row) { return "Name: " + row.getString(0); }
141+
}).collect();
142+
for (String name: teenagerNames) {
143+
System.out.println(name);
144+
}
145+
146+
// Alternatively, a JavaSchemaRDD can be created for a JSON dataset represented by
147+
// a RDD[String] storing one JSON object per string.
148+
List<String> jsonData = Arrays.asList(
149+
"{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
150+
JavaRDD<String> anotherPeopleRDD = ctx.parallelize(jsonData);
151+
JavaSchemaRDD peopleFromJsonRDD = sqlCtx.jsonRDD(anotherPeopleRDD);
152+
153+
// Take a look at the schema of this new JavaSchemaRDD.
154+
peopleFromJsonRDD.printSchema();
155+
// The schema of anotherPeople is ...
156+
// root
157+
// |-- address: StructType
158+
// | |-- city: StringType
159+
// | |-- state: StringType
160+
// |-- name: StringType
161+
162+
peopleFromJsonRDD.registerAsTable("people2");
163+
164+
JavaSchemaRDD peopleWithCity = sqlCtx.sql("SELECT name, address.city FROM people2");
165+
List<String> nameAndCity = peopleWithCity.map(new Function<Row, String>() {
166+
public String call(Row row) {
167+
return "Name: " + row.getString(0) + ", City: " + row.getString(1);
168+
}
169+
}).collect();
170+
for (String name: nameAndCity) {
171+
System.out.println(name);
172+
}
98173
}
99174
}

0 commit comments

Comments
 (0)