From 04a2e04f70474788448ebf0aaa8bb6670e5fa01b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 4 May 2019 09:31:36 +0800 Subject: [PATCH] Add comment. --- python/pyspark/sql/tests/test_serde.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py index 1c18e930eb91d..ed4b9a7755879 100644 --- a/python/pyspark/sql/tests/test_serde.py +++ b/python/pyspark/sql/tests/test_serde.py @@ -128,6 +128,10 @@ def test_BinaryType_serialization(self): def test_int_array_serialization(self): # Note that this test seems dependent on parallelism. + # This issue is because internal object map in Pyrolite is not cleared after op code + # STOP. If we use protocol 4 to pickle Python objects, op code MEMOIZE will store + # objects in the map. We need to clear up it to make sure next unpickling works on + # clear map. data = self.spark.sparkContext.parallelize([[1, 2, 3, 4]] * 100, numSlices=12) df = self.spark.createDataFrame(data, "array") self.assertEqual(len(list(filter(lambda r: None in r.value, df.collect()))), 0)