Skip to content

Commit 20936a5

Browse files
committed
Added tests and documentation
1 parent e4d21b4 commit 20936a5

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

project/SparkBuild.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,6 @@ object SparkBuild extends Build {
522522

523523
def extraAssemblySettings() = Seq(
524524
test in assembly := {},
525-
assemblyOption in assembly ~= { _.copy(cacheOutput = false) },
526525
mergeStrategy in assembly := {
527526
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
528527
case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard

python/pyspark/rdd.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,13 +1388,30 @@ def _is_pipelinable(self):
13881388
return not (self.is_cached or self.is_checkpointed)
13891389

13901390
class Row(dict):
1391+
"""
1392+
An extended L{dict} that takes a L{dict} in its constructor, and exposes those items as fields.
1393+
1394+
>>> r = Row({"hello" : "world", "foo" : "bar"})
1395+
>>> r.hello
1396+
'world'
1397+
>>> r.foo
1398+
'bar'
1399+
"""
13911400

13921401
def __init__(self, d):
13931402
d.update(self.__dict__)
13941403
self.__dict__ = d
13951404
dict.__init__(self, d)
13961405

13971406
class SchemaRDD(RDD):
1407+
"""
1408+
An RDD of Row objects that has an associated schema. The underlying JVM object is a SchemaRDD,
1409+
not a PythonRDD, so we can utilize the relational query api exposed by SparkSQL.
1410+
1411+
For normal L{RDD} operations (map, count, etc.) the L{SchemaRDD} is not operated on directly, as
1412+
it's underlying implementation is a RDD composed of Java objects. Instead it is converted to a
1413+
PythonRDD in the JVM, on which Python operations can be done.
1414+
"""
13981415

13991416
def __init__(self, jschema_rdd, sql_ctx):
14001417
self.sql_ctx = sql_ctx
@@ -1408,6 +1425,10 @@ def __init__(self, jschema_rdd, sql_ctx):
14081425

14091426
@property
14101427
def _jrdd(self):
1428+
"""
1429+
Lazy evaluation of PythonRDD object. Only done when a user calls methods defined by the
1430+
L{RDD} super class (map, count, etc.).
1431+
"""
14111432
return self.toPython()._jrdd
14121433

14131434
@property

0 commit comments

Comments
 (0)