Skip to content

Commit

Permalink
Global variables handling in dynamically defined functions. (#205)
Browse files Browse the repository at this point in the history
  • Loading branch information
ogrisel authored Sep 18, 2018
1 parent 5c781be commit 1d73b39
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 2 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
master
======

- Ensure that unpickling a function defined in a dynamic module several times
sequentially does not reset the values of global variables.
([issue #187](https://github.com/cloudpipe/cloudpickle/issues/205))


0.5.6
=====

Expand Down
25 changes: 23 additions & 2 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,22 @@
PY3 = True


# Container for the global namespace to ensure consistent unpickling of
# functions defined in dynamic modules (modules not registed in sys.modules).
_dynamic_modules_globals = weakref.WeakValueDictionary()


class _DynamicModuleFuncGlobals(dict):
"""Global variables referenced by a function defined in a dynamic module
To avoid leaking references we store such context in a WeakValueDictionary
instance. However instances of python builtin types such as dict cannot
be used directly as values in such a construct, hence the need for a
derived class.
"""
pass


def _make_cell_set_template_code():
"""Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF
Expand Down Expand Up @@ -1090,12 +1106,17 @@ def _make_skel_func(code, cell_count, base_globals=None):
if base_globals is None:
base_globals = {}
elif isinstance(base_globals, str):
base_globals_name = base_globals
if sys.modules.get(base_globals, None) is not None:
# this checks if we can import the previous environment the object
# This checks if we can import the previous environment the object
# lived in
base_globals = vars(sys.modules[base_globals])
else:
base_globals = {}
base_globals = _dynamic_modules_globals.get(
base_globals_name, None)
if base_globals is None:
base_globals = _DynamicModuleFuncGlobals()
_dynamic_modules_globals[base_globals_name] = base_globals

base_globals['__builtins__'] = __builtins__

Expand Down
136 changes: 136 additions & 0 deletions tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import collections
import base64
import functools
import gc
import imp
from io import BytesIO
import itertools
Expand Down Expand Up @@ -44,6 +45,7 @@

import cloudpickle
from cloudpickle.cloudpickle import _find_module, _make_empty_cell, cell_set
from cloudpickle.cloudpickle import _dynamic_modules_globals

from .testutils import subprocess_pickle_echo
from .testutils import assert_run_python_script
Expand Down Expand Up @@ -441,6 +443,59 @@ def method(self, x):
mod1, mod2 = pickle_depickle([mod, mod])
self.assertEqual(id(mod1), id(mod2))

def test_dynamic_modules_globals(self):
# _dynamic_modules_globals is a WeakValueDictionary, so if a value
# in this dict (containing a set of global variables from a dynamic
# module created in the parent process) has no other reference than in
# this dict in the child process, it will be garbage collected.

# We first create a module
mod = imp.new_module('mod')
code = '''
x = 1
def func():
return
'''
exec(textwrap.dedent(code), mod.__dict__)

pickled_module_path = 'mod_f.pkl'

child_process_script = '''
import pickle
from cloudpickle.cloudpickle import _dynamic_modules_globals
import gc
with open("{pickled_module_path}", 'rb') as f:
func = pickle.load(f)
# A dictionnary storing the globals of the newly unpickled function
# should have been created
assert list(_dynamic_modules_globals.keys()) == ['mod']
# func.__globals__ is the only non-weak reference to
# _dynamic_modules_globals['mod']. By deleting func, we delete also
# _dynamic_modules_globals['mod']
del func
gc.collect()
# There is no reference to the globals of func since func has been
# deleted and _dynamic_modules_globals is a WeakValueDictionary,
# so _dynamic_modules_globals should now be empty
assert list(_dynamic_modules_globals.keys()) == []
'''

child_process_script = child_process_script.format(
pickled_module_path=pickled_module_path)

try:
with open(pickled_module_path, 'wb') as f:
cloudpickle.dump(mod.func, f)

assert_run_python_script(textwrap.dedent(child_process_script))

finally:
os.unlink(pickled_module_path)


def test_load_dynamic_module_in_grandchild_process(self):
# Make sure that when loaded, a dynamic module preserves its dynamic
# property. Otherwise, this will lead to an ImportError if pickled in
Expand Down Expand Up @@ -1018,6 +1073,87 @@ def f1():
finally:
_TEST_GLOBAL_VARIABLE = orig_value

def test_function_from_dynamic_module_with_globals_modifications(self):
# This test verifies that the global variable state of a function
# defined in a dynamic module in a child process are not reset by
# subsequent uplickling.

# first, we create a dynamic module in the parent process
mod = imp.new_module('mod')
code = '''
GLOBAL_STATE = "initial value"
def func_defined_in_dynamic_module(v=None):
global GLOBAL_STATE
if v is not None:
GLOBAL_STATE = v
return GLOBAL_STATE
'''
exec(textwrap.dedent(code), mod.__dict__)

try:
# Simple sanity check on the function's output
assert mod.func_defined_in_dynamic_module() == "initial value"

# The function of mod is pickled two times, with two different
# values for the global variable GLOBAL_STATE.
# Then we launch a child process that sequentially unpickles the
# two functions. Those unpickle functions should share the same
# global variables in the child process:
# Once the first function gets unpickled, mod is created and
# tracked in the child environment. This is state is preserved
# when unpickling the second function whatever the global variable
# GLOBAL_STATE's value at the time of pickling.

with open('function_with_initial_globals.pkl', 'wb') as f:
cloudpickle.dump(mod.func_defined_in_dynamic_module, f)

# Change the mod's global variable
mod.GLOBAL_STATE = 'changed value'

# At this point, mod.func_defined_in_dynamic_module()
# returns the updated value. Let's pickle it again.
assert mod.func_defined_in_dynamic_module() == 'changed value'
with open('function_with_modified_globals.pkl', 'wb') as f:
cloudpickle.dump(mod.func_defined_in_dynamic_module, f)

child_process_code = """
import pickle
with open('function_with_initial_globals.pkl','rb') as f:
func_with_initial_globals = pickle.load(f)
# At this point, a module called 'mod' should exist in
# _dynamic_modules_globals. Further function loading
# will use the globals living in mod.
assert func_with_initial_globals() == 'initial value'
# Load a function with initial global variable that was
# pickled after a change in the global variable
with open('function_with_modified_globals.pkl','rb') as f:
func_with_modified_globals = pickle.load(f)
# assert the this unpickling did not modify the value of
# the local
assert func_with_modified_globals() == 'initial value'
# Update the value from the child process and check that
# unpickling again does not reset our change.
assert func_with_initial_globals('new value') == 'new value'
assert func_with_modified_globals() == 'new value'
with open('function_with_initial_globals.pkl','rb') as f:
func_with_initial_globals = pickle.load(f)
assert func_with_initial_globals() == 'new value'
assert func_with_modified_globals() == 'new value'
"""
assert_run_python_script(textwrap.dedent(child_process_code))

finally:
os.unlink('function_with_initial_globals.pkl')
os.unlink('function_with_modified_globals.pkl')

@pytest.mark.skipif(sys.version_info >= (3, 0),
reason="hardcoded pickle bytes for 2.7")
def test_function_pickle_compat_0_4_0(self):
Expand Down

0 comments on commit 1d73b39

Please sign in to comment.