Skip to content

Commit 1836944

Browse files
marmbrusahirreddy
authored andcommitted
Fix comments.
1 parent e00980f commit 1836944

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

docs/sql-programming-guide.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,19 @@ row. Any RDD of dictionaries can converted to a SchemaRDD and then registered as
210210
can be used in subsequent SQL statements.
211211

212212
{% highlight python %}
213+
# Load a text file and convert each line to a dictionary.
213214
lines = sc.textFile("examples/src/main/resources/people.txt")
214215
parts = lines.map(lambda l: l.split(","))
215216
people = parts.map(lambda p: {"name": p[0], "age": int(p[1])})
216217

218+
# Infer the schema, and register the SchemaRDD as a table.
217219
peopleTable = sqlCtx.inferSchema(people)
218220
peopleTable.registerAsTable("people")
219221

222+
# SQL can be run over SchemaRDDs that have been registered as a table.
220223
teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
224+
225+
# The results of SQL queries are RDDs and support all the normal RDD operations.
221226
teenNames = teenagers.map(lambda p: "Name: " + p.name)
222227
{% endhighlight %}
223228

@@ -291,11 +296,11 @@ peopleTable # The SchemaRDD from the previous example.
291296
# JavaSchemaRDDs can be saved as parquet files, maintaining the schema information.
292297
peopleTable.saveAsParquetFile("people.parquet")
293298

294-
// Read in the parquet file created above. Parquet files are self-describing so the schema is preserved.
295-
// The result of loading a parquet file is also a JavaSchemaRDD.
299+
# Read in the parquet file created above. Parquet files are self-describing so the schema is preserved.
300+
# The result of loading a parquet file is also a JavaSchemaRDD.
296301
parquetFile = sqlCtx.parquetFile("people.parquet")
297302

298-
//Parquet files can also be registered as tables and then used in SQL statements.
303+
# Parquet files can also be registered as tables and then used in SQL statements.
299304
parquetFile.registerAsTable("parquetFile");
300305
teenagers = sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
301306

@@ -401,7 +406,7 @@ hiveCtx = HiveContext(sqlCtx)
401406
hiveCtx.hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
402407
hiveCtx.hql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
403408

404-
// Queries are expressed in HiveQL.
409+
# Queries can be expressed in HiveQL.
405410
results = hiveCtx.hql("FROM src SELECT key, value").collect()
406411

407412
{% endhighlight %}

0 commit comments

Comments
 (0)