Update Java Example.

yhuai · yhuai · commit 83013fb9e7f3 · 2014-06-16T12:14:42.000-07:00
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
@@ -405,23 +405,23 @@ JavaSQLContext sqlCtx = new org.apache.spark.sql.api.java.JavaSQLContext(sc);
 
 // A JSON dataset is pointed by path.
 // The path can be either a single text file or a directory storing text files.
-String path = "examples/src/main/resources/people.json"
+String path = "examples/src/main/resources/people.json";
 // Create a JavaSchemaRDD from the file(s) pointed by path
-JavaSchemaRDD people = sqlCtx.jsonFile(path)
+JavaSchemaRDD people = sqlCtx.jsonFile(path);
 
 // Because the schema of a JSON dataset is automatically inferred, to write queries,
 // it is better to take a look at what is the schema.
-people.printSchema()
+people.printSchema();
 // The schema of people is ...
 // root
 //  |-- age: IntegerType
 //  |-- name: StringType
 
 // Register this JavaSchemaRDD as a table.
-people.registerAsTable("people")
+people.registerAsTable("people");
 
 // SQL statements can be run by using the sql methods provided by sqlCtx.
-JavaSchemaRDD teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+JavaSchemaRDD teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
 
 // The results of SQL queries are JavaSchemaRDDs and support all the normal RDD operations.
 // The columns of a row in the result can be accessed by ordinal.
@@ -435,11 +435,11 @@ List<String> teenagerNames = teenagers.map(new Function<Row, String>() {
 // a RDD[String] storing one JSON object per string.
 List<String> jsonData = Arrays.asList(
   "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
-JavaRDD<String> anotherPeopleRDD = sc.parallelize(jsonData)
-val anotherPeople = sqlCtx.jsonRDD(anotherPeopleRDD)
+JavaRDD<String> anotherPeopleRDD = sc.parallelize(jsonData);
+JavaSchemaRDD anotherPeople = sqlCtx.jsonRDD(anotherPeopleRDD);
 
 // Take a look at the schema of this new JavaSchemaRDD.
-anotherPeople.printSchema()
+anotherPeople.printSchema();
 // The schema of anotherPeople is ...
 // root
 //  |-- address: StructType
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
@@ -18,6 +18,7 @@
 package org.apache.spark.examples.sql;
 
 import java.io.Serializable;
+import java.util.Arrays;
 import java.util.List;
 
 import org.apache.spark.SparkConf;
@@ -56,6 +57,8 @@ public static void main(String[] args) throws Exception {
     JavaSparkContext ctx = new JavaSparkContext(sparkConf);
     JavaSQLContext sqlCtx = new JavaSQLContext(ctx);
 
+
+    System.out.println("=== Data source: RDD ===");
     // Load a text file and convert each line to a Java Bean.
     JavaRDD<Person> people = ctx.textFile("examples/src/main/resources/people.txt").map(
       new Function<String, Person>() {
@@ -84,16 +87,88 @@ public String call(Row row) {
         return "Name: " + row.getString(0);
       }
     }).collect();
+    for (String name: teenagerNames) {
+      System.out.println(name);
+    }
 
+    System.out.println("=== Data source: Parquet File ===");
     // JavaSchemaRDDs can be saved as parquet files, maintaining the schema information.
     schemaPeople.saveAsParquetFile("people.parquet");
 
-    // Read in the parquet file created above.  Parquet files are self-describing so the schema is preserved.
+    // Read in the parquet file created above.
+    // Parquet files are self-describing so the schema is preserved.
     // The result of loading a parquet file is also a JavaSchemaRDD.
     JavaSchemaRDD parquetFile = sqlCtx.parquetFile("people.parquet");
 
     //Parquet files can also be registered as tables and then used in SQL statements.
     parquetFile.registerAsTable("parquetFile");
-    JavaSchemaRDD teenagers2 = sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
+    JavaSchemaRDD teenagers2 =
+      sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
+    teenagerNames = teenagers2.map(new Function<Row, String>() {
+      public String call(Row row) {
+          return "Name: " + row.getString(0);
+      }
+    }).collect();
+    for (String name: teenagerNames) {
+      System.out.println(name);
+    }
+
+    System.out.println("=== Data source: JSON Dataset ===");
+    // A JSON dataset is pointed by path.
+    // The path can be either a single text file or a directory storing text files.
+    String path = "examples/src/main/resources/people.json";
+    // Create a JavaSchemaRDD from the file(s) pointed by path
+    JavaSchemaRDD peopleFromJsonFile = sqlCtx.jsonFile(path);
+
+    // Because the schema of a JSON dataset is automatically inferred, to write queries,
+    // it is better to take a look at what is the schema.
+    peopleFromJsonFile.printSchema();
+    // The schema of people is ...
+    // root
+    //  |-- age: IntegerType
+    //  |-- name: StringType
+
+    // Register this JavaSchemaRDD as a table.
+    peopleFromJsonFile.registerAsTable("people");
+
+    // SQL statements can be run by using the sql methods provided by sqlCtx.
+    JavaSchemaRDD teenagers3 = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
+
+    // The results of SQL queries are JavaSchemaRDDs and support all the normal RDD operations.
+    // The columns of a row in the result can be accessed by ordinal.
+    teenagerNames = teenagers3.map(new Function<Row, String>() {
+      public String call(Row row) { return "Name: " + row.getString(0); }
+    }).collect();
+    for (String name: teenagerNames) {
+      System.out.println(name);
+    }
+
+    // Alternatively, a JavaSchemaRDD can be created for a JSON dataset represented by
+    // a RDD[String] storing one JSON object per string.
+    List<String> jsonData = Arrays.asList(
+          "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
+    JavaRDD<String> anotherPeopleRDD = ctx.parallelize(jsonData);
+    JavaSchemaRDD peopleFromJsonRDD = sqlCtx.jsonRDD(anotherPeopleRDD);
+
+    // Take a look at the schema of this new JavaSchemaRDD.
+    peopleFromJsonRDD.printSchema();
+    // The schema of anotherPeople is ...
+    // root
+    //  |-- address: StructType
+    //  |    |-- city: StringType
+    //  |    |-- state: StringType
+    //  |-- name: StringType
+
+    peopleFromJsonRDD.registerAsTable("people2");
+
+    JavaSchemaRDD peopleWithCity = sqlCtx.sql("SELECT name, address.city FROM people2");
+    List<String> nameAndCity = peopleWithCity.map(new Function<Row, String>() {
+      public String call(Row row) {
+        return "Name: " + row.getString(0) + ", City: " + row.getString(1);
+      }
+    }).collect();
+    for (String name: nameAndCity) {
+      System.out.println(name);
+    }
   }
 }