|
33 | 33 | [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] |
34 | 34 | >>> sc.stop() |
35 | 35 |
|
36 | | -PySpark serialize objects in batches; By default, the batch size is chosen based |
37 | | -on the size of objects, also configurable by SparkContext's C{batchSize} parameter: |
| 36 | +PySpark serializes objects in batches; by default, the batch size is chosen based |
| 37 | +on the size of objects and is also configurable by SparkContext's C{batchSize} |
| 38 | +parameter: |
38 | 39 |
|
39 | 40 | >>> sc = SparkContext('local', 'test', batchSize=2) |
40 | 41 | >>> rdd = sc.parallelize(range(16), 4).map(lambda x: x) |
@@ -100,7 +101,7 @@ def load_stream(self, stream): |
100 | 101 | def _load_stream_without_unbatching(self, stream): |
101 | 102 | """ |
102 | 103 | Return an iterator of deserialized batches (iterable) of objects from the input stream. |
103 | | - if the serializer does not operate on batches the default implementation returns an |
| 104 | + If the serializer does not operate on batches the default implementation returns an |
104 | 105 | iterator of single element lists. |
105 | 106 | """ |
106 | 107 | return map(lambda x: [x], self.load_stream(stream)) |
@@ -461,7 +462,7 @@ def dumps(self, obj): |
461 | 462 | return obj |
462 | 463 |
|
463 | 464 |
|
464 | | -# Hook namedtuple, make it picklable |
| 465 | +# Hack namedtuple, make it picklable |
465 | 466 |
|
466 | 467 | __cls = {} |
467 | 468 |
|
@@ -525,15 +526,15 @@ def namedtuple(*args, **kwargs): |
525 | 526 | cls = _old_namedtuple(*args, **kwargs) |
526 | 527 | return _hack_namedtuple(cls) |
527 | 528 |
|
528 | | - # replace namedtuple with new one |
| 529 | + # replace namedtuple with the new one |
529 | 530 | collections.namedtuple.__globals__["_old_namedtuple_kwdefaults"] = _old_namedtuple_kwdefaults |
530 | 531 | collections.namedtuple.__globals__["_old_namedtuple"] = _old_namedtuple |
531 | 532 | collections.namedtuple.__globals__["_hack_namedtuple"] = _hack_namedtuple |
532 | 533 | collections.namedtuple.__code__ = namedtuple.__code__ |
533 | 534 | collections.namedtuple.__hijack = 1 |
534 | 535 |
|
535 | | - # hack the cls already generated by namedtuple |
536 | | - # those created in other module can be pickled as normal, |
| 536 | + # hack the cls already generated by namedtuple. |
| 537 | + # Those created in other modules can be pickled as normal, |
537 | 538 | # so only hack those in __main__ module |
538 | 539 | for n, o in sys.modules["__main__"].__dict__.items(): |
539 | 540 | if (type(o) is type and o.__base__ is tuple |
@@ -627,7 +628,7 @@ def loads(self, obj): |
627 | 628 | elif _type == b'P': |
628 | 629 | return pickle.loads(obj[1:]) |
629 | 630 | else: |
630 | | - raise ValueError("invalid sevialization type: %s" % _type) |
| 631 | + raise ValueError("invalid serialization type: %s" % _type) |
631 | 632 |
|
632 | 633 |
|
633 | 634 | class CompressedSerializer(FramedSerializer): |
|
0 commit comments