Add comment.

apache · May 4, 2019 · 04a2e04 · 04a2e04
1 parent d7312fb
commit 04a2e04
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py
@@ -128,6 +128,10 @@ def test_BinaryType_serialization(self):
 
     def test_int_array_serialization(self):
         # Note that this test seems dependent on parallelism.
+        # This issue is because internal object map in Pyrolite is not cleared after op code
+        # STOP. If we use protocol 4 to pickle Python objects, op code MEMOIZE will store
+        # objects in the map. We need to clear up it to make sure next unpickling works on
+        # clear map.
         data = self.spark.sparkContext.parallelize([[1, 2, 3, 4]] * 100, numSlices=12)
         df = self.spark.createDataFrame(data, "array<integer>")
         self.assertEqual(len(list(filter(lambda r: None in r.value, df.collect()))), 0)