From b31d000fe64008891cc61aa4b57cdd26ccbd5aec Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Tue, 7 Feb 2017 15:14:06 +1100 Subject: [PATCH 1/8] Add regression tests for submodule dependence of pickled function, issue #78 --- tests/cloudpickle_test.py | 40 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index a1dec1151..670bc812e 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -360,6 +360,46 @@ def f(): self.assertTrue(f2 is f3) self.assertEqual(f2(), res) + def test_submodule(self): + # Function that refers (by attribute) to a sub-module of a package. + + # Choose any module NOT imported by __init__ of the parent package + # examples in standard library include http.cookies and unittest.mock + global http # imitate performing this import at top of file + import http.cookies + def example(): + x = http.HTTPStatus + x = http.cookies.Morsel # potential AttributeError + + s = cloudpickle.dumps(example) + + # refresh the environment (unimport http.cookies) + del http + del sys.modules['http.cookies'] + del sys.modules['http'] + + # deserialise + f = pickle.loads(s) + f() # perform test for error + + def test_submodule_closure(self): + # Same as test_submodule except the package is not a global + def scope(): + import http.cookies + def example(): + x = http.HTTPStatus + x = http.cookies.Morsel # potential AttributeError + return example + example = scope() + + s = cloudpickle.dumps(example) + + # refresh the environment (unimport http.cookies) + del sys.modules['http.cookies'] + del sys.modules['http'] + + f = cloudpickle.loads(s) + f() # test if __name__ == '__main__': unittest.main() From 0657c6d06fe93ed5cf4fcd0a5037b75b7817696e Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Tue, 7 Feb 2017 15:53:06 +1100 Subject: [PATCH 2/8] save submodules to resolve issue #78 --- cloudpickle/cloudpickle.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e8f4223b2..ddc7696ea 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -238,7 +238,7 @@ def save_function(self, obj, name=None): # a builtin_function_or_method which comes in as an attribute of some # object (e.g., object.__new__, itertools.chain.from_iterable) will end # up with modname "__main__" and so end up here. But these functions - # have no __code__ attribute in CPython, so the handling for + # have no __code__ attribute in CPython, so the handling for # user-defined functions below will fail. # So we pickle them here using save_reduce; have to do it differently # for different python versions. @@ -282,6 +282,21 @@ def save_function(self, obj, name=None): self.memoize(obj) dispatch[types.FunctionType] = save_function + def _save_subimports(self, code, top_level_dependencies): + """Ensure de-pickler imports any package sub-modules needed by the function""" + # check if any known dependency is an imported package + for x in top_level_dependencies: + if isinstance(x, types.ModuleType) and x.__package__: + # check if the package has any currently loaded sub-imports + prefix = x.__name__ + '.' + for name, module in sys.modules.items(): + if name.startswith(prefix): + # check whether the function can address the sub-module + tokens = set(name[len(prefix):].split('.')) + if not tokens - set(code.co_names): + self.save(module) # ensure the unpickler executes import of this submodule + self.write(pickle.POP) # then discard the reference to it + def save_function_tuple(self, func): """ Pickles an actual func object. @@ -307,6 +322,8 @@ def save_function_tuple(self, func): save(_fill_function) # skeleton function updater write(pickle.MARK) # beginning of tuple that _fill_function expects + self._save_subimports(code, set(f_globals.values()) | set(closure)) + # create a skeleton function object and memoize it save(_make_skel_func) save((code, closure, base_globals)) From 09222731d465950e947180f9cf453ceee3c11ef0 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Wed, 8 Feb 2017 10:27:40 +1100 Subject: [PATCH 3/8] change test example for python 2.6 support --- tests/cloudpickle_test.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 670bc812e..f59dd20e0 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -364,19 +364,21 @@ def test_submodule(self): # Function that refers (by attribute) to a sub-module of a package. # Choose any module NOT imported by __init__ of the parent package - # examples in standard library include http.cookies and unittest.mock - global http # imitate performing this import at top of file - import http.cookies + # examples in standard library include: + # - http.cookies, unittest.mock, curses.textpad, xml.etree.ElementTree + + global xml # imitate performing this import at top of file + import xml.etree.ElementTree def example(): - x = http.HTTPStatus - x = http.cookies.Morsel # potential AttributeError + x = xml.etree.ElementTree.Comment # potential AttributeError s = cloudpickle.dumps(example) - # refresh the environment (unimport http.cookies) - del http - del sys.modules['http.cookies'] - del sys.modules['http'] + # refresh the environment (unimport the dependency) + del xml + del sys.modules['xml.etree.ElementTree'] + del sys.modules['xml.etree'] + del sys.modules['xml'] # deserialise f = pickle.loads(s) @@ -385,18 +387,18 @@ def example(): def test_submodule_closure(self): # Same as test_submodule except the package is not a global def scope(): - import http.cookies + import xml.etree.ElementTree def example(): - x = http.HTTPStatus - x = http.cookies.Morsel # potential AttributeError + x = xml.etree.ElementTree.Comment # potential AttributeError return example example = scope() s = cloudpickle.dumps(example) - # refresh the environment (unimport http.cookies) - del sys.modules['http.cookies'] - del sys.modules['http'] + # refresh the environment (unimport dependency) + del sys.modules['xml.etree.ElementTree'] + del sys.modules['xml.etree'] + del sys.modules['xml'] f = cloudpickle.loads(s) f() # test From 11709cb47fdca99b55c2bb2390cdfb8ca60001e5 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 10 Feb 2017 13:56:23 +1100 Subject: [PATCH 4/8] fix test_submodule (incomplete un-import) --- tests/cloudpickle_test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f59dd20e0..b71e356a8 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -363,7 +363,7 @@ def f(): def test_submodule(self): # Function that refers (by attribute) to a sub-module of a package. - # Choose any module NOT imported by __init__ of the parent package + # Choose any module NOT imported by __init__ of its parent package # examples in standard library include: # - http.cookies, unittest.mock, curses.textpad, xml.etree.ElementTree @@ -374,11 +374,11 @@ def example(): s = cloudpickle.dumps(example) - # refresh the environment (unimport the dependency) + # refresh the environment, i.e., unimport the dependency del xml - del sys.modules['xml.etree.ElementTree'] - del sys.modules['xml.etree'] - del sys.modules['xml'] + for item in list(sys.modules): + if item.split('.')[0] == 'xml': + del sys.modules[item] # deserialise f = pickle.loads(s) @@ -396,9 +396,9 @@ def example(): s = cloudpickle.dumps(example) # refresh the environment (unimport dependency) - del sys.modules['xml.etree.ElementTree'] - del sys.modules['xml.etree'] - del sys.modules['xml'] + for item in list(sys.modules): + if item.split('.')[0] == 'xml': + del sys.modules[item] f = cloudpickle.loads(s) f() # test From 6854eaaecc393c16248c973d50c652fd24131166 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 10 Feb 2017 14:00:09 +1100 Subject: [PATCH 5/8] add subprocess test --- tests/cloudpickle_test.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index b71e356a8..025c94ae0 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -9,6 +9,8 @@ import itertools import platform import textwrap +import base64 +import subprocess try: # try importing numpy and scipy. These are not hard dependencies and @@ -403,5 +405,27 @@ def example(): f = cloudpickle.loads(s) f() # test + def test_multiprocess(self): + # running a function pickled by another process (a la dask.distributed) + def scope(): + import curses.textpad + def example(): + x = xml.etree.ElementTree.Comment + x = curses.textpad.Textbox + return example + global xml + import xml.etree.ElementTree + example = scope() + + s = cloudpickle.dumps(example) + + # choose "subprocess" rather than "multiprocessing" because the latter + # library uses fork to preserve the parent environment. + command = ("import pickle, base64; " + "pickle.loads(base64.b32decode('" + + base64.b32encode(s).decode('ascii') + + "'))()") + assert not subprocess.call([sys.executable, '-c', command]) + if __name__ == '__main__': unittest.main() From ee40673a1ca82556335c7df0a95fb388987e196a Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 24 Feb 2017 14:38:35 +1100 Subject: [PATCH 6/8] cosmetic per ogrisel --- cloudpickle/cloudpickle.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index ddc7696ea..fb40342e8 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -283,7 +283,10 @@ def save_function(self, obj, name=None): dispatch[types.FunctionType] = save_function def _save_subimports(self, code, top_level_dependencies): - """Ensure de-pickler imports any package sub-modules needed by the function""" + """ + Ensure de-pickler imports any package child-modules that + are needed by the function + """ # check if any known dependency is an imported package for x in top_level_dependencies: if isinstance(x, types.ModuleType) and x.__package__: @@ -294,8 +297,11 @@ def _save_subimports(self, code, top_level_dependencies): # check whether the function can address the sub-module tokens = set(name[len(prefix):].split('.')) if not tokens - set(code.co_names): - self.save(module) # ensure the unpickler executes import of this submodule - self.write(pickle.POP) # then discard the reference to it + # ensure unpickler executes this import + self.save(module) + # then discards the reference to it + self.write(pickle.POP) + def save_function_tuple(self, func): """ Pickles an actual func object. From 3319f50051f72437a12a65c646b9deebf7ef0d71 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 24 Feb 2017 14:43:28 +1100 Subject: [PATCH 7/8] add test of function imports This test would already pass prior to the changes for supporting submodules. per ogrisel --- tests/cloudpickle_test.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 025c94ae0..90135cd6e 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -427,5 +427,24 @@ def example(): "'))()") assert not subprocess.call([sys.executable, '-c', command]) + def test_import(self): + # like test_multiprocess except subpackage modules referenced directly + # (unlike test_submodule) + global etree + import xml.etree.ElementTree as etree + import curses.textpad as foobar + def example(): + x = etree.Comment + x = foobar.Textbox + + s = cloudpickle.dumps(example) + + command = ("import pickle, base64; " + "pickle.loads(base64.b32decode('" + + base64.b32encode(s).decode('ascii') + + "'))()") + assert not subprocess.call([sys.executable, '-c', command]) + + if __name__ == '__main__': unittest.main() From 4b9bfbc9279da1ef03f9d11005eed6f39a65af6f Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 24 Feb 2017 14:58:55 +1100 Subject: [PATCH 8/8] strengthen import test --- tests/cloudpickle_test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 90135cd6e..c540d5d4c 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -431,11 +431,14 @@ def test_import(self): # like test_multiprocess except subpackage modules referenced directly # (unlike test_submodule) global etree + def scope(): + import curses.textpad as foobar + def example(): + x = etree.Comment + x = foobar.Textbox + return example + example = scope() import xml.etree.ElementTree as etree - import curses.textpad as foobar - def example(): - x = etree.Comment - x = foobar.Textbox s = cloudpickle.dumps(example)