From e7def7c491c8fb06a73aea2f2e072dbe0e59c1da Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Wed, 6 Jul 2016 16:45:21 -0700
Subject: [PATCH 01/11] [SPARK-16381] move example code to a separate R file

---
 docs/sql-programming-guide.md          | 155 ++--------------------
 examples/src/main/r/RSparkSQLExample.R | 175 +++++++++++++++++++++++++
 2 files changed, 188 insertions(+), 142 deletions(-)
 create mode 100644 examples/src/main/r/RSparkSQLExample.R
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 68419e133159..0fb04937071e 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -86,9 +86,7 @@ The entry point into all functionality in Spark is the [`SparkSession`](api/pyth
 
 The entry point into all functionality in Spark is the [`SparkSession`](api/R/sparkR.session.html) class. To initialize a basic `SparkSession`, just call `sparkR.session()`:
 
-{% highlight r %}
-sparkR.session()
-{% endhighlight %}
+{% include_example init_session r/RSparkSQLExample.R %}
 
 Note that when invoked for the first time, `sparkR.session()` initializes a global `SparkSession` singleton instance, and always returns a reference to this instance for successive invocations. In this way, users only need to initialize the `SparkSession` once, then SparkR functions like `read.df` will be able to access this global instance implicitly, and users don't need to pass the `SparkSession` instance around.
 </div>
@@ -155,12 +153,7 @@ from a Hive table, or from [Spark data sources](#data-sources).
 
 As an example, the following creates a DataFrame based on the content of a JSON file:
 
-{% highlight r %}
-df <- read.json("examples/src/main/resources/people.json")
-
-# Displays the content of the DataFrame
-showDF(df)
-{% endhighlight %}
+{% include_example create_DataFrames r/RSparkSQLExample.R %}
 
 </div>
 </div>
@@ -343,50 +336,8 @@ In addition to simple column references and expressions, DataFrames also have a
 </div>
 
 <div data-lang="r"  markdown="1">
-{% highlight r %}
-# Create the DataFrame
-df <- read.json("examples/src/main/resources/people.json")
-
-# Show the content of the DataFrame
-showDF(df)
-## age  name
-## null Michael
-## 30   Andy
-## 19   Justin
-
-# Print the schema in a tree format
-printSchema(df)
-## root
-## |-- age: long (nullable = true)
-## |-- name: string (nullable = true)
 
-# Select only the "name" column
-showDF(select(df, "name"))
-## name
-## Michael
-## Andy
-## Justin
-
-# Select everybody, but increment the age by 1
-showDF(select(df, df$name, df$age + 1))
-## name    (age + 1)
-## Michael null
-## Andy    31
-## Justin  20
-
-# Select people older than 21
-showDF(where(df, df$age > 21))
-## age name
-## 30  Andy
-
-# Count people by age
-showDF(count(groupBy(df, "age")))
-## age  count
-## null 1
-## 19   1
-## 30   1
-
-{% endhighlight %}
+{% include_example untyped_transformations r/RSparkSQLExample.R %}
 
 For a complete list of the types of operations that can be performed on a DataFrame refer to the [API Documentation](api/R/index.html).
 
@@ -429,12 +380,10 @@ df = spark.sql("SELECT * FROM table")
 <div data-lang="r"  markdown="1">
 The `sql` function enables applications to run SQL queries programmatically and returns the result as a `SparkDataFrame`.
 
-{% highlight r %}
-df <- sql("SELECT * FROM table")
-{% endhighlight %}
-</div>
+{% include_example sql_query r/RSparkSQLExample.R %}
 
 </div>
+</div>
 
 
 ## Creating Datasets
@@ -888,10 +837,7 @@ df.select("name", "favorite_color").write.save("namesAndFavColors.parquet")
 
 <div data-lang="r"  markdown="1">
 
-{% highlight r %}
-df <- read.df("examples/src/main/resources/users.parquet")
-write.df(select(df, "name", "favorite_color"), "namesAndFavColors.parquet")
-{% endhighlight %}
+{% include_example source_parquet r/RSparkSQLExample.R %}
 
 </div>
 </div>
@@ -937,12 +883,7 @@ df.select("name", "age").write.save("namesAndAges.parquet", format="parquet")
 </div>
 <div data-lang="r"  markdown="1">
 
-{% highlight r %}
-
-df <- read.df("examples/src/main/resources/people.json", "json")
-write.df(select(df, "name", "age"), "namesAndAges.parquet", "parquet")
-
-{% endhighlight %}
+{% include_example source_json r/RSparkSQLExample.R %}
 
 </div>
 </div>
@@ -978,9 +919,7 @@ df = spark.sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet
 
 <div data-lang="r"  markdown="1">
 
-{% highlight r %}
-df <- sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`")
-{% endhighlight %}
+{% include_example direct_query r/RSparkSQLExample.R %}
 
 </div>
 </div>
@@ -1133,26 +1072,7 @@ for teenName in teenNames.collect():
 
 <div data-lang="r"  markdown="1">
 
-{% highlight r %}
-
-schemaPeople # The SparkDataFrame from the previous example.
-
-# SparkDataFrame can be saved as Parquet files, maintaining the schema information.
-write.parquet(schemaPeople, "people.parquet")
-
-# Read in the Parquet file created above. Parquet files are self-describing so the schema is preserved.
-# The result of loading a parquet file is also a DataFrame.
-parquetFile <- read.parquet("people.parquet")
-
-# Parquet files can also be used to create a temporary view and then used in SQL statements.
-createOrReplaceTempView(parquetFile, "parquetFile")
-teenagers <- sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
-schema <- structType(structField("name", "string"))
-teenNames <- dapply(df, function(p) { cbind(paste("Name:", p$name)) }, schema)
-for (teenName in collect(teenNames)$name) {
-  cat(teenName, "\n")
-}
-{% endhighlight %}
+{% include_example load_programmatically r/RSparkSQLExample.R %}
 
 </div>
 
@@ -1315,27 +1235,7 @@ df3.printSchema()
 
 <div data-lang="r"  markdown="1">
 
-{% highlight r %}
-
-# Create a simple DataFrame, stored into a partition directory
-write.df(df1, "data/test_table/key=1", "parquet", "overwrite")
-
-# Create another DataFrame in a new partition directory,
-# adding a new column and dropping an existing column
-write.df(df2, "data/test_table/key=2", "parquet", "overwrite")
-
-# Read the partitioned table
-df3 <- read.df("data/test_table", "parquet", mergeSchema="true")
-printSchema(df3)
-
-# The final schema consists of all 3 columns in the Parquet files together
-# with the partitioning column appeared in the partition directory paths.
-# root
-# |-- single: int (nullable = true)
-# |-- double: int (nullable = true)
-# |-- triple: int (nullable = true)
-# |-- key : int (nullable = true)
-{% endhighlight %}
+{% include_example schema_merging r/RSparkSQLExample.R %}
 
 </div>
 
@@ -1601,25 +1501,8 @@ Note that the file that is offered as _a json file_ is not a typical JSON file.
 line must contain a separate, self-contained valid JSON object. As a consequence,
 a regular multi-line JSON file will most often fail.
 
-{% highlight r %}
-# A JSON dataset is pointed to by path.
-# The path can be either a single text file or a directory storing text files.
-path <- "examples/src/main/resources/people.json"
-# Create a DataFrame from the file(s) pointed to by path
-people <- read.json(path)
+{% include_example load_json_file r/RSparkSQLExample.R %}
 
-# The inferred schema can be visualized using the printSchema() method.
-printSchema(people)
-# root
-#  |-- age: long (nullable = true)
-#  |-- name: string (nullable = true)
-
-# Register this DataFrame as a table.
-createOrReplaceTempView(people, "people")
-
-# SQL statements can be run by using the sql methods.
-teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
-{% endhighlight %}
 </div>
 
 <div data-lang="sql"  markdown="1">
@@ -1734,16 +1617,8 @@ results = spark.sql("FROM src SELECT key, value").collect()
 
 When working with Hive one must instantiate `SparkSession` with Hive support. This
 adds support for finding tables in the MetaStore and writing queries using HiveQL.
-{% highlight r %}
-# enableHiveSupport defaults to TRUE
-sparkR.session(enableHiveSupport = TRUE)
-sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
-sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
-
-# Queries can be expressed in HiveQL.
-results <- collect(sql("FROM src SELECT key, value"))
 
-{% endhighlight %}
+{% include_example hive_table r/RSparkSQLExample.R %}
 
 </div>
 </div>
@@ -1920,11 +1795,7 @@ df = spark.read.format('jdbc').options(url='jdbc:postgresql:dbserver', dbtable='
 
 <div data-lang="r"  markdown="1">
 
-{% highlight r %}
-
-df <- read.jdbc("jdbc:postgresql:dbserver", "schema.tablename", user = "username", password = "password")
-
-{% endhighlight %}
+{% include_example jdbc r/RSparkSQLExample.R %}
 
 </div>
 
diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
new file mode 100644
index 000000000000..e8260f001b8f
--- /dev/null
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -0,0 +1,175 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# $example on:init_session$
+sparkR.session()
+# $example off:init_session$
+
+
+# $example on:create_DataFrames$
+df <- read.json("examples/src/main/resources/people.json")
+
+# Displays the content of the DataFrame
+showDF(df)
+# $example off:create_DataFrames$
+
+
+# $example on:untyped_transformations$
+# Create the DataFrame
+df <- read.json("examples/src/main/resources/people.json")
+
+# Show the content of the DataFrame
+showDF(df)
+## age  name
+## null Michael
+## 30   Andy
+## 19   Justin
+
+# Print the schema in a tree format
+printSchema(df)
+## root
+## |-- age: long (nullable = true)
+## |-- name: string (nullable = true)
+
+# Select only the "name" column
+showDF(select(df, "name"))
+## name
+## Michael
+## Andy
+## Justin
+
+# Select everybody, but increment the age by 1
+showDF(select(df, df$name, df$age + 1))
+## name    (age + 1)
+## Michael null
+## Andy    31
+## Justin  20
+
+# Select people older than 21
+showDF(where(df, df$age > 21))
+## age name
+## 30  Andy
+
+# Count people by age
+showDF(count(groupBy(df, "age")))
+## age  count
+## null 1
+## 19   1
+## 30   1
+# $example off:untyped_transformations$
+
+
+# $example on:sql_query$
+df <- sql("SELECT * FROM table")
+# $example off:sql_query$
+
+
+# $example on:source_parquet$
+df <- read.df("examples/src/main/resources/users.parquet")
+write.df(select(df, "name", "favorite_color"), "namesAndFavColors.parquet")
+# $example off:source_parquet$
+
+
+# $example on:source_json$
+df <- read.df("examples/src/main/resources/people.json", "json")
+write.df(select(df, "name", "age"), "namesAndAges.parquet", "parquet")
+# $example off:source_json$
+
+
+# $example on:direct_query$
+df <- sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`")
+# $example off:direct_query$
+
+
+# $example on:load_programmatically$
+schemaPeople # The SparkDataFrame from the previous example.
+
+# SparkDataFrame can be saved as Parquet files, maintaining the schema information.
+write.parquet(schemaPeople, "people.parquet")
+
+# Read in the Parquet file created above. Parquet files are self-describing so the schema is preserved.
+# The result of loading a parquet file is also a DataFrame.
+parquetFile <- read.parquet("people.parquet")
+
+# Parquet files can also be used to create a temporary view and then used in SQL statements.
+createOrReplaceTempView(parquetFile, "parquetFile")
+teenagers <- sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
+schema <- structType(structField("name", "string"))
+teenNames <- dapply(df, function(p) { cbind(paste("Name:", p$name)) }, schema)
+for (teenName in collect(teenNames)$name) {
+  cat(teenName, "\n")
+}
+# $example off:load_programmatically$
+
+
+# $example on:schema_merging$
+# Create a simple DataFrame, stored into a partition directory
+write.df(df1, "data/test_table/key=1", "parquet", "overwrite")
+
+# Create another DataFrame in a new partition directory,
+# adding a new column and dropping an existing column
+write.df(df2, "data/test_table/key=2", "parquet", "overwrite")
+
+# Read the partitioned table
+df3 <- read.df("data/test_table", "parquet", mergeSchema="true")
+printSchema(df3)
+
+# The final schema consists of all 3 columns in the Parquet files together
+# with the partitioning column appeared in the partition directory paths.
+# root
+# |-- single: int (nullable = true)
+# |-- double: int (nullable = true)
+# |-- triple: int (nullable = true)
+# |-- key : int (nullable = true)
+# $example off:schema_merging$
+
+
+# $example on:load_json_file$
+# A JSON dataset is pointed to by path.
+# The path can be either a single text file or a directory storing text files.
+path <- "examples/src/main/resources/people.json"
+# Create a DataFrame from the file(s) pointed to by path
+people <- read.json(path)
+
+# The inferred schema can be visualized using the printSchema() method.
+printSchema(people)
+# root
+#  |-- age: long (nullable = true)
+#  |-- name: string (nullable = true)
+
+# Register this DataFrame as a table.
+createOrReplaceTempView(people, "people")
+
+# SQL statements can be run by using the sql methods.
+teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+# $example off:load_json_file$
+
+
+# $example on:hive_table$
+# enableHiveSupport defaults to TRUE
+sparkR.session(enableHiveSupport = TRUE)
+sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
+
+# Queries can be expressed in HiveQL.
+results <- collect(sql("FROM src SELECT key, value"))
+# $example off:hive_table$
+
+
+# $example on:jdbc$
+df <- read.jdbc("jdbc:postgresql:dbserver", "schema.tablename", user = "username", password = "password")
+# $example off:jdbc$

From 1af09f31fd506143e8fe45b530dd46e39df76d6b Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Thu, 7 Jul 2016 17:48:04 -0700
Subject: [PATCH 02/11] [SPARK-16381] some fixes, more to come

---
 docs/sql-programming-guide.md          |  2 +-
 examples/src/main/r/RSparkSQLExample.R | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 0fb04937071e..448251cfdc69 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -337,7 +337,7 @@ In addition to simple column references and expressions, DataFrames also have a
 
 <div data-lang="r"  markdown="1">
 
-{% include_example untyped_transformations r/RSparkSQLExample.R %}
+{% include_example dataframe_operations r/RSparkSQLExample.R %}
 
 For a complete list of the types of operations that can be performed on a DataFrame refer to the [API Documentation](api/R/index.html).
 
diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index e8260f001b8f..fd6b2d187b7a 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -24,16 +24,19 @@ sparkR.session()
 df <- read.json("examples/src/main/resources/people.json")
 
 # Displays the content of the DataFrame
+head(df)
+
+# Another method to print the first few rows and optionally truncate the printing of long values
 showDF(df)
 # $example off:create_DataFrames$
 
 
-# $example on:untyped_transformations$
+# $example on:dataframe_operations$
 # Create the DataFrame
 df <- read.json("examples/src/main/resources/people.json")
 
 # Show the content of the DataFrame
-showDF(df)
+head(df)
 ## age  name
 ## null Michael
 ## 30   Andy
@@ -46,31 +49,31 @@ printSchema(df)
 ## |-- name: string (nullable = true)
 
 # Select only the "name" column
-showDF(select(df, "name"))
+head(select(df, "name"))
 ## name
 ## Michael
 ## Andy
 ## Justin
 
 # Select everybody, but increment the age by 1
-showDF(select(df, df$name, df$age + 1))
+head(select(df, df$name, df$age + 1))
 ## name    (age + 1)
 ## Michael null
 ## Andy    31
 ## Justin  20
 
 # Select people older than 21
-showDF(where(df, df$age > 21))
+head(where(df, df$age > 21))
 ## age name
 ## 30  Andy
 
 # Count people by age
-showDF(count(groupBy(df, "age")))
+head(count(groupBy(df, "age")))
 ## age  count
 ## null 1
 ## 19   1
 ## 30   1
-# $example off:untyped_transformations$
+# $example off:dataframe_operations$
 
 
 # $example on:sql_query$

From 9ac6a7049187d817a732193fad0e86993eddf197 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Thu, 7 Jul 2016 23:11:54 -0700
Subject: [PATCH 03/11] [SPARK-16381] make schema merge example runnable

---
 examples/src/main/r/RSparkSQLExample.R | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index fd6b2d187b7a..357e2897b220 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -99,10 +99,10 @@ df <- sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`")
 
 
 # $example on:load_programmatically$
-schemaPeople # The SparkDataFrame from the previous example.
+df <- read.df("examples/src/main/resources/people.json", "json")
 
 # SparkDataFrame can be saved as Parquet files, maintaining the schema information.
-write.parquet(schemaPeople, "people.parquet")
+write.parquet(df, "people.parquet")
 
 # Read in the Parquet file created above. Parquet files are self-describing so the schema is preserved.
 # The result of loading a parquet file is also a DataFrame.
@@ -120,6 +120,9 @@ for (teenName in collect(teenNames)$name) {
 
 
 # $example on:schema_merging$
+df1 <- createDataFrame(data.frame(single=c(12, 29), double=c(19, 23)))
+df2 <- createDataFrame(data.frame(double=c(19, 23), triple=c(23, 18)))
+
 # Create a simple DataFrame, stored into a partition directory
 write.df(df1, "data/test_table/key=1", "parquet", "overwrite")
 
@@ -134,9 +137,9 @@ printSchema(df3)
 # The final schema consists of all 3 columns in the Parquet files together
 # with the partitioning column appeared in the partition directory paths.
 # root
-# |-- single: int (nullable = true)
-# |-- double: int (nullable = true)
-# |-- triple: int (nullable = true)
+# |-- single: double (nullable = true)
+# |-- double: double (nullable = true)
+# |-- triple: double (nullable = true)
 # |-- key : int (nullable = true)
 # $example off:schema_merging$
 
@@ -159,6 +162,9 @@ createOrReplaceTempView(people, "people")
 
 # SQL statements can be run by using the sql methods.
 teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+head(teenagers)
+## name
+## 1 Justin
 # $example off:load_json_file$
 
 

From 05ee46bc46ddcb6855ab85ea79f256b1d6d27b90 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Thu, 7 Jul 2016 23:17:27 -0700
Subject: [PATCH 04/11] [SPARK-16381] make sql_query example runnable

---
 examples/src/main/r/RSparkSQLExample.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index 357e2897b220..cf96e9adbf0c 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -76,6 +76,11 @@ head(count(groupBy(df, "age")))
 # $example off:dataframe_operations$
 
 
+# Create a DataFrame from json file
+path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json")
+peopleDF <- read.json(path)
+# Register this DataFrame as a table.
+createOrReplaceTempView(peopleDF, "table")
 # $example on:sql_query$
 df <- sql("SELECT * FROM table")
 # $example off:sql_query$

From 828b2cf7d37684b8cb05803b10e41264adc4c926 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Thu, 7 Jul 2016 23:45:12 -0700
Subject: [PATCH 05/11] [SPARK-16381] make load_programmatically example
 runnable

---
 examples/src/main/r/RSparkSQLExample.R | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index cf96e9adbf0c..a931f0ab51ee 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -15,6 +15,8 @@
 # limitations under the License.
 #
 
+library(SparkR)
+
 # $example on:init_session$
 sparkR.session()
 # $example off:init_session$
@@ -116,11 +118,18 @@ parquetFile <- read.parquet("people.parquet")
 # Parquet files can also be used to create a temporary view and then used in SQL statements.
 createOrReplaceTempView(parquetFile, "parquetFile")
 teenagers <- sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
+head(teenagers)
+## name
+## 1 Justin
+
 schema <- structType(structField("name", "string"))
 teenNames <- dapply(df, function(p) { cbind(paste("Name:", p$name)) }, schema)
 for (teenName in collect(teenNames)$name) {
   cat(teenName, "\n")
 }
+## Name: Michael
+## Name: Andy
+## Name: Justin
 # $example off:load_programmatically$
 
 

From 7dca42dfd87597db8aa15cf1c32868baecbfd99e Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Thu, 7 Jul 2016 23:47:41 -0700
Subject: [PATCH 06/11] [SPARK-16381] replace last showDF()

---
 examples/src/main/r/RSparkSQLExample.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index a931f0ab51ee..802fa194fb94 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -29,7 +29,7 @@ df <- read.json("examples/src/main/resources/people.json")
 head(df)
 
 # Another method to print the first few rows and optionally truncate the printing of long values
-showDF(df)
+head(df)
 # $example off:create_DataFrames$
 
 

From cd184b31d9fe7f57c6690e8a69f77b64f3ec9228 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Fri, 8 Jul 2016 11:51:56 -0700
Subject: [PATCH 07/11] [SPARK-16381] minor fix

---
 examples/src/main/r/RSparkSQLExample.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index 802fa194fb94..868d5482c4cb 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -18,7 +18,7 @@
 library(SparkR)
 
 # $example on:init_session$
-sparkR.session()
+sparkR.session(appName='MyApp', sparkConfig=list(spark.executor.memory="1g"))
 # $example off:init_session$
 
 
@@ -29,7 +29,7 @@ df <- read.json("examples/src/main/resources/people.json")
 head(df)
 
 # Another method to print the first few rows and optionally truncate the printing of long values
-head(df)
+showDF(df)
 # $example off:create_DataFrames$
 
 

From 5e95fdd327efd7edbbecf30f3346b486ad86bbf8 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Fri, 8 Jul 2016 12:01:05 -0700
Subject: [PATCH 08/11] [SPARK-16381] make it verbose

---
 examples/src/main/r/RSparkSQLExample.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index 868d5482c4cb..1fe6f219e1ca 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -96,7 +96,8 @@ write.df(select(df, "name", "favorite_color"), "namesAndFavColors.parquet")
 
 # $example on:source_json$
 df <- read.df("examples/src/main/resources/people.json", "json")
-write.df(select(df, "name", "age"), "namesAndAges.parquet", "parquet")
+namesAndAges <- select(df, "name", "age")
+write.df(namesAndAges, "namesAndAges.parquet", "parquet")
 # $example off:source_json$
 
 

From d5b0b7f111a28c63ca6e501ff0017af64881f0b4 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Fri, 8 Jul 2016 12:10:39 -0700
Subject: [PATCH 09/11] [SPARK-16381] remove code duplicate etc

---
 examples/src/main/r/RSparkSQLExample.R | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index 1fe6f219e1ca..c339273a6f93 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -78,11 +78,8 @@ head(count(groupBy(df, "age")))
 # $example off:dataframe_operations$
 
 
-# Create a DataFrame from json file
-path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json")
-peopleDF <- read.json(path)
 # Register this DataFrame as a table.
-createOrReplaceTempView(peopleDF, "table")
+createOrReplaceTempView(df, "table")
 # $example on:sql_query$
 df <- sql("SELECT * FROM table")
 # $example off:sql_query$
@@ -123,6 +120,7 @@ head(teenagers)
 ## name
 ## 1 Justin
 
+# We can also run custom R-UDFs on Spark DataFrames. Here we prefix all the names with "Name:"
 schema <- structType(structField("name", "string"))
 teenNames <- dapply(df, function(p) { cbind(paste("Name:", p$name)) }, schema)
 for (teenName in collect(teenNames)$name) {

From a1eca2bc5f038e3966de87fdbce35f42cee4dd32 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Fri, 8 Jul 2016 16:06:28 -0700
Subject: [PATCH 10/11] [SPARK-16381] style fix

---
 examples/src/main/r/RSparkSQLExample.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index c339273a6f93..eba3f1b91e2d 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -18,7 +18,7 @@
 library(SparkR)
 
 # $example on:init_session$
-sparkR.session(appName='MyApp', sparkConfig=list(spark.executor.memory="1g"))
+sparkR.session(appName = "MyApp", sparkConfig = list(spark.executor.memory = "1g"))
 # $example off:init_session$
 
 

From 7195750788d1526f77eea1345f7c0cf5431aca05 Mon Sep 17 00:00:00 2001
From: Xin Ren <iamshrek@126.com>
Date: Fri, 8 Jul 2016 16:53:28 -0700
Subject: [PATCH 11/11] [SPARK-16381] fix space style in other r examples

---
 examples/src/main/r/dataframe.R | 2 +-
 examples/src/main/r/ml.R        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
index a377d6e864d2..295f9b427622 100644
--- a/examples/src/main/r/dataframe.R
+++ b/examples/src/main/r/dataframe.R
@@ -18,7 +18,7 @@
 library(SparkR)
 
 # Initialize SparkSession
-sc <- sparkR.session(appName="SparkR-DataFrame-example")
+sc <- sparkR.session(appName = "SparkR-DataFrame-example")
 
 # Create a simple local data.frame
 localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
diff --git a/examples/src/main/r/ml.R b/examples/src/main/r/ml.R
index 940c98dcb97a..65242e68b3c8 100644
--- a/examples/src/main/r/ml.R
+++ b/examples/src/main/r/ml.R
@@ -22,7 +22,7 @@
 library(SparkR)
 
 # Initialize SparkSession
-sparkR.session(appName="SparkR-ML-example")
+sparkR.session(appName = "SparkR-ML-example")
 
 # $example on$
 ############################ spark.glm and glm ##############################################