@@ -174,7 +174,6 @@ def _ensure_initialized(cls, instance=None, gateway=None):
174174 SparkContext ._gateway = gateway or launch_gateway ()
175175 SparkContext ._jvm = SparkContext ._gateway .jvm
176176 SparkContext ._writeToFile = SparkContext ._jvm .PythonRDD .writeToFile
177- SparkContext ._pythonToJava = SparkContext ._jvm .PythonRDD .pythonToJava
178177 SparkContext ._pythonToJavaMap = SparkContext ._jvm .PythonRDD .pythonToJavaMap
179178 SparkContext ._javaToPython = SparkContext ._jvm .PythonRDD .javaToPython
180179
@@ -481,21 +480,21 @@ def __init__(self, sparkContext):
481480 >>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
482481 ... {"field1" : 2, "field2": "row2"}, {"field1" : 3, "field2": "row3"}])
483482
484- >>> srdd = sqlCtx.applySchema (rdd)
485- >>> sqlCtx.applySchema (srdd) # doctest: +IGNORE_EXCEPTION_DETAIL
483+ >>> srdd = sqlCtx.inferSchema (rdd)
484+ >>> sqlCtx.inferSchema (srdd) # doctest: +IGNORE_EXCEPTION_DETAIL
486485 Traceback (most recent call last):
487486 ...
488487 ValueError:...
489488
490489 >>> bad_rdd = sc.parallelize([1,2,3])
491- >>> sqlCtx.applySchema (bad_rdd) # doctest: +IGNORE_EXCEPTION_DETAIL
490+ >>> sqlCtx.inferSchema (bad_rdd) # doctest: +IGNORE_EXCEPTION_DETAIL
492491 Traceback (most recent call last):
493492 ...
494493 ValueError:...
495494
496495 >>> allTypes = sc.parallelize([{"int" : 1, "string" : "string", "double" : 1.0, "long": 1L,
497496 ... "boolean" : True}])
498- >>> srdd = sqlCtx.applySchema (allTypes).map(lambda x: (x.int, x.string, x.double, x.long,
497+ >>> srdd = sqlCtx.inferSchema (allTypes).map(lambda x: (x.int, x.string, x.double, x.long,
499498 ... x.boolean))
500499 >>> srdd.collect()[0]
501500 (1, u'string', 1.0, 1, True)
@@ -514,7 +513,7 @@ def _ssql_ctx(self):
514513 self ._scala_SQLContext = self ._jvm .SQLContext (self ._jsc .sc ())
515514 return self ._scala_SQLContext
516515
517- def applySchema (self , rdd ):
516+ def inferSchema (self , rdd ):
518517 """
519518 Infer and apply a schema to an RDD of L{dict}s. We peek at the first row of the RDD to
520519 determine the fields names and types, and then use that to extract all the dictionaries.
@@ -523,7 +522,7 @@ def applySchema(self, rdd):
523522 >>> sqlCtx = SQLContext(sc)
524523 >>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
525524 ... {"field1" : 2, "field2": "row2"}, {"field1" : 3, "field2": "row3"}])
526- >>> srdd = sqlCtx.applySchema (rdd)
525+ >>> srdd = sqlCtx.inferSchema (rdd)
527526 >>> srdd.collect() == [{"field1" : 1, "field2" : "row1"}, {"field1" : 2, "field2": "row2"},
528527 ... {"field1" : 3, "field2": "row3"}]
529528 True
@@ -535,7 +534,7 @@ def applySchema(self, rdd):
535534 (SchemaRDD .__name__ , rdd .first ()))
536535
537536 jrdd = self ._sc ._pythonToJavaMap (rdd ._jrdd )
538- srdd = self ._ssql_ctx .applySchema (jrdd .rdd ())
537+ srdd = self ._ssql_ctx .inferSchema (jrdd .rdd ())
539538 return SchemaRDD (srdd , self )
540539
541540 def registerRDDAsTable (self , rdd , tableName ):
@@ -546,7 +545,7 @@ def registerRDDAsTable(self, rdd, tableName):
546545 >>> sqlCtx = SQLContext(sc)
547546 >>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
548547 ... {"field1" : 2, "field2": "row2"}, {"field1" : 3, "field2": "row3"}])
549- >>> srdd = sqlCtx.applySchema (rdd)
548+ >>> srdd = sqlCtx.inferSchema (rdd)
550549 >>> sqlCtx.registerRDDAsTable(srdd, "table1")
551550 """
552551 if (rdd .__class__ is SchemaRDD ):
@@ -563,7 +562,7 @@ def parquetFile(self, path):
563562 >>> sqlCtx = SQLContext(sc)
564563 >>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
565564 ... {"field1" : 2, "field2": "row2"}, {"field1" : 3, "field2": "row3"}])
566- >>> srdd = sqlCtx.applySchema (rdd)
565+ >>> srdd = sqlCtx.inferSchema (rdd)
567566 >>> srdd.saveAsParquetFile("/tmp/tmp.parquet")
568567 >>> srdd2 = sqlCtx.parquetFile("/tmp/tmp.parquet")
569568 >>> srdd.collect() == srdd2.collect()
@@ -580,7 +579,7 @@ def sql(self, sqlQuery):
580579 >>> sqlCtx = SQLContext(sc)
581580 >>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
582581 ... {"field1" : 2, "field2": "row2"}, {"field1" : 3, "field2": "row3"}])
583- >>> srdd = sqlCtx.applySchema (rdd)
582+ >>> srdd = sqlCtx.inferSchema (rdd)
584583 >>> sqlCtx.registerRDDAsTable(srdd, "table1")
585584 >>> srdd2 = sqlCtx.sql("SELECT field1 AS f1, field2 as f2 from table1")
586585 >>> srdd2.collect() == [{"f1" : 1, "f2" : "row1"}, {"f1" : 2, "f2": "row2"},
@@ -596,7 +595,7 @@ def table(self, tableName):
596595 >>> sqlCtx = SQLContext(sc)
597596 >>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
598597 ... {"field1" : 2, "field2": "row2"}, {"field1" : 3, "field2": "row3"}])
599- >>> srdd = sqlCtx.applySchema (rdd)
598+ >>> srdd = sqlCtx.inferSchema (rdd)
600599 >>> sqlCtx.registerRDDAsTable(srdd, "table1")
601600 >>> srdd2 = sqlCtx.table("table1")
602601 >>> srdd.collect() == srdd2.collect()
0 commit comments