From fce797e68c3c65761a9cab48fc81ada30fbd5950 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sat, 11 Dec 2021 09:17:16 -0500 Subject: [PATCH 01/46] Fix #229 --- dill/_dill.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 9c1813c9..be66ec66 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -481,6 +481,7 @@ def __init__(self, *args, **kwds): self._strictio = False #_strictio self._fmode = settings['fmode'] if _fmode is None else _fmode self._recurse = settings['recurse'] if _recurse is None else _recurse + self._recursive_cells = {} def dump(self, obj): #NOTE: if settings change, need to update attributes stack.clear() # clear record of 'recursion-sensitive' pickled objects @@ -1298,6 +1299,11 @@ def save_wrapper_descriptor(pickler, obj): def save_cell(pickler, obj): log.info("Ce: %s" % obj) f = obj.cell_contents + if is_dill(pickler, child=True): + recursive_cells = pickler._recursive_cells.get(f) + if recursive_cells is not None: + recursive_cells.append(obj) + f = None pickler.save_reduce(_create_cell, (f,), obj=obj) log.info("# Ce") return @@ -1470,9 +1476,10 @@ def save_type(pickler, obj): log.info("# T6") return elif obj.__module__ == '__main__': + pickler_is_dill = is_dill(pickler, child=True) if issubclass(type(obj), type): # try: # used when pickling the class as code (or the interpreter) - if is_dill(pickler, child=True) and not pickler._byref: + if pickler_is_dill and not pickler._byref and obj not in pickler._recursive_cells: # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' log.info("%s: %s" % (_t, obj)) @@ -1493,8 +1500,20 @@ def save_type(pickler, obj): #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) for name in _dict.get("__slots__", []): del _dict[name] - pickler.save_reduce(_create_type, (type(obj), obj.__name__, + if pickler_is_dill: + pickler._recursive_cells[obj] = [] + name = getattr(obj, "__qualname__", obj.__name__) + pickler.save_reduce(_create_type, (type(obj), name, obj.__bases__, _dict), obj=obj) + if pickler_is_dill: + recursive_cells = pickler._recursive_cells.pop(obj) + for t in recursive_cells: + pickler.save_reduce(setattr, (t, 'cell_contents', obj)) + # pop None off created by setattr off stack + if PY3: + pickler.write(bytes('0', 'UTF-8')) + else: + pickler.write('0') log.info("# %s" % _t) # special cases: NoneType, NotImplementedType, EllipsisType elif obj is type(None): @@ -1570,7 +1589,6 @@ def save_function(pickler, obj): if PY3: #NOTE: workaround for 'super' (see issue #75) _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) - if _super: pickler._byref = True if _memo: pickler._recurse = False fkwdefaults = getattr(obj, '__kwdefaults__', None) pickler.save_reduce(_create_function, (obj.__code__, @@ -1579,18 +1597,16 @@ def save_function(pickler, obj): obj.__dict__, fkwdefaults), obj=obj) else: _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) - if _super: pickler._byref = True if _memo: pickler._recurse = False pickler.save_reduce(_create_function, (obj.func_code, globs, obj.func_name, obj.func_defaults, obj.func_closure, obj.__dict__), obj=obj) - if _super: pickler._byref = _byref if _memo: pickler._recurse = _recurse #clear = (_byref, _super, _recurse, _memo) #print(clear + (OLDER,)) #NOTE: workaround for #234; "partial" still is problematic for recurse - if OLDER and not _byref and (_super or (not _super and _memo) or (not _super and not _memo and _recurse)): pickler.clear_memo() + if OLDER and not _byref and (_memo or (not _memo and _recurse)): pickler.clear_memo() #if _memo: # stack.remove(id(obj)) # #pickler.clear_memo() From ec190eadb0c0eaff80811aef891803e607146022 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sat, 11 Dec 2021 18:02:01 -0500 Subject: [PATCH 02/46] .get still fails when index is unhashable --- dill/_dill.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index be66ec66..bfd60bf1 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1300,7 +1300,7 @@ def save_cell(pickler, obj): log.info("Ce: %s" % obj) f = obj.cell_contents if is_dill(pickler, child=True): - recursive_cells = pickler._recursive_cells.get(f) + recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: recursive_cells.append(obj) f = None @@ -1479,7 +1479,7 @@ def save_type(pickler, obj): pickler_is_dill = is_dill(pickler, child=True) if issubclass(type(obj), type): # try: # used when pickling the class as code (or the interpreter) - if pickler_is_dill and not pickler._byref and obj not in pickler._recursive_cells: + if pickler_is_dill and not pickler._byref and id(obj) not in pickler._recursive_cells: # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' log.info("%s: %s" % (_t, obj)) @@ -1501,12 +1501,12 @@ def save_type(pickler, obj): for name in _dict.get("__slots__", []): del _dict[name] if pickler_is_dill: - pickler._recursive_cells[obj] = [] + pickler._recursive_cells[id(obj)] = [] name = getattr(obj, "__qualname__", obj.__name__) pickler.save_reduce(_create_type, (type(obj), name, obj.__bases__, _dict), obj=obj) if pickler_is_dill: - recursive_cells = pickler._recursive_cells.pop(obj) + recursive_cells = pickler._recursive_cells.pop(id(obj)) for t in recursive_cells: pickler.save_reduce(setattr, (t, 'cell_contents', obj)) # pop None off created by setattr off stack From e6decfe4acb06e7f93c70dd2ecc4e091ca04452a Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sat, 11 Dec 2021 18:08:14 -0500 Subject: [PATCH 03/46] Cells are not allowed to change before 3.7 --- dill/_dill.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dill/_dill.py b/dill/_dill.py index bfd60bf1..d87b78d8 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -39,6 +39,7 @@ def _trace(boolean): # OLDER: 3.0 <= x < 3.4 *OR* x < 2.7.10 #NOTE: guessing relevant versions OLDER = (PY3 and sys.hexversion < 0x3040000) or (sys.hexversion < 0x2070ab1) OLD33 = (sys.hexversion < 0x3030000) +OLD37 = (sys.hexversion < 0x3070000) PY34 = (0x3040000 <= sys.hexversion < 0x3050000) if PY3: #XXX: get types from .objtypes ? import builtins as __builtin__ @@ -1299,7 +1300,7 @@ def save_wrapper_descriptor(pickler, obj): def save_cell(pickler, obj): log.info("Ce: %s" % obj) f = obj.cell_contents - if is_dill(pickler, child=True): + if not OLD37 and is_dill(pickler, child=True): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: recursive_cells.append(obj) From 8e1cda7279869937e4ba34c0832d5ddd8e90fb69 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sat, 11 Dec 2021 18:17:58 -0500 Subject: [PATCH 04/46] Uncomment test cases if Python > 3.7 --- dill/_dill.py | 10 +++++----- tests/test_recursive.py | 15 +++++++++------ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index d87b78d8..e2e2cf9f 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1511,10 +1511,7 @@ def save_type(pickler, obj): for t in recursive_cells: pickler.save_reduce(setattr, (t, 'cell_contents', obj)) # pop None off created by setattr off stack - if PY3: - pickler.write(bytes('0', 'UTF-8')) - else: - pickler.write('0') + pickler.write(bytes('0', 'UTF-8')) log.info("# %s" % _t) # special cases: NoneType, NotImplementedType, EllipsisType elif obj is type(None): @@ -1590,6 +1587,7 @@ def save_function(pickler, obj): if PY3: #NOTE: workaround for 'super' (see issue #75) _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) + if OLD37 and _super: pickler._byref = True if _memo: pickler._recurse = False fkwdefaults = getattr(obj, '__kwdefaults__', None) pickler.save_reduce(_create_function, (obj.__code__, @@ -1598,16 +1596,18 @@ def save_function(pickler, obj): obj.__dict__, fkwdefaults), obj=obj) else: _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) + if OLD37 and _super: pickler._byref = True if _memo: pickler._recurse = False pickler.save_reduce(_create_function, (obj.func_code, globs, obj.func_name, obj.func_defaults, obj.func_closure, obj.__dict__), obj=obj) + if OLD37 and _super: pickler._byref = _byref if _memo: pickler._recurse = _recurse #clear = (_byref, _super, _recurse, _memo) #print(clear + (OLDER,)) #NOTE: workaround for #234; "partial" still is problematic for recurse - if OLDER and not _byref and (_memo or (not _memo and _recurse)): pickler.clear_memo() + if OLDER and not _byref and (_super or (not _super and _memo) or (not _super and not _memo and _recurse)): pickler.clear_memo() #if _memo: # stack.remove(id(obj)) # #pickler.clear_memo() diff --git a/tests/test_recursive.py b/tests/test_recursive.py index 78e5790b..0c0c1403 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -7,7 +7,7 @@ import dill from functools import partial -from dill._dill import PY3, OLDER +from dill._dill import PY3, OLDER, OLD37 _super = super class obj1(object): @@ -27,17 +27,20 @@ def __init__(self): def test_super(): assert dill.copy(obj1(), byref=True) assert dill.copy(obj1(), byref=True, recurse=True) - #assert dill.copy(obj1(), recurse=True) #FIXME: fails __main__.py + if not OLD37: + assert dill.copy(obj1(), recurse=True) #FIXME: fails __main__.py assert dill.copy(obj1()) assert dill.copy(obj2(), byref=True) assert dill.copy(obj2(), byref=True, recurse=True) - #assert dill.copy(obj2(), recurse=True) #FIXME: fails __main__.py + if not OLD37: + assert dill.copy(obj2(), recurse=True) #FIXME: fails __main__.py assert dill.copy(obj2()) assert dill.copy(obj3(), byref=True) assert dill.copy(obj3(), byref=True, recurse=True) - #assert dill.copy(obj3(), recurse=True) #FIXME: fails __main__.py + if not OLD37: + assert dill.copy(obj3(), recurse=True) #FIXME: fails __main__.py assert dill.copy(obj3()) @@ -79,8 +82,8 @@ def __init__(self): def test_partials(): assert dill.copy(SubMachine(), byref=True) assert dill.copy(SubMachine(), byref=True, recurse=True) - #if not OLDER: #FIXME: fails __main__.py - # assert dill.copy(SubMachine(), recurse=True) + if not OLD37: #FIXME: fails __main__.py + assert dill.copy(SubMachine(), recurse=True) assert dill.copy(SubMachine()) From 007f2098788be3a0fe827339855368fc937e1f19 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sat, 11 Dec 2021 18:26:36 -0500 Subject: [PATCH 05/46] Add more complex test case --- tests/test_recursive.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_recursive.py b/tests/test_recursive.py index 0c0c1403..5488af2b 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -87,9 +87,25 @@ def test_partials(): assert dill.copy(SubMachine()) +class obj4: + def __init__(self): + super().__init__() + a = self + class obj5: + def __init__(self): + super().__init__() + self.a = a + self.b = obj5() + + +def test_circular_reference(): + if not OLD37: + assert dill.copy(obj4()) + if __name__ == '__main__': #print(('byref','_super','_recurse','_memo','_stop','OLDER')) test_super() test_partial() test_partials() + test_circular_reference() From 2199ff61ca946266aa43c2dcbf8b108588c4c709 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sat, 11 Dec 2021 23:29:13 -0500 Subject: [PATCH 06/46] Spooky edits at a distance --- dill/_dill.py | 29 +++++++++++++++++++---------- tests/test_recursive.py | 20 +++++++------------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index e2e2cf9f..26ff9b85 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -39,7 +39,6 @@ def _trace(boolean): # OLDER: 3.0 <= x < 3.4 *OR* x < 2.7.10 #NOTE: guessing relevant versions OLDER = (PY3 and sys.hexversion < 0x3040000) or (sys.hexversion < 0x2070ab1) OLD33 = (sys.hexversion < 0x3030000) -OLD37 = (sys.hexversion < 0x3070000) PY34 = (0x3040000 <= sys.hexversion < 0x3050000) if PY3: #XXX: get types from .objtypes ? import builtins as __builtin__ @@ -881,9 +880,19 @@ def __getattribute__(self, attr): if PY3: def _create_cell(contents): return (lambda y: contents).__closure__[0] + def _create_reference_cell(): + # Hacky trick that allows for the creation of cells that can be spookily + # edited at a distance + contents = None + def updater(value): + nonlocal contents + contents = value + updater(updater) + return (lambda: contents).__closure__[0] else: def _create_cell(contents): return (lambda y: contents).func_closure[0] + # _create_reference_cell not possible in Python 2 def _create_weakref(obj, *args): from weakref import ref @@ -1298,15 +1307,18 @@ def save_wrapper_descriptor(pickler, obj): @register(CellType) def save_cell(pickler, obj): - log.info("Ce: %s" % obj) f = obj.cell_contents - if not OLD37 and is_dill(pickler, child=True): + if is_dill(pickler, child=True): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: + log.info("Ce2: %s" % obj) + pickler.save_reduce(_create_reference_cell, (), obj=obj) recursive_cells.append(obj) - f = None + log.info("# Ce2") + return + log.info("Ce1: %s" % obj) pickler.save_reduce(_create_cell, (f,), obj=obj) - log.info("# Ce") + log.info("# Ce1") return if not IS_PYPY: @@ -1509,7 +1521,7 @@ def save_type(pickler, obj): if pickler_is_dill: recursive_cells = pickler._recursive_cells.pop(id(obj)) for t in recursive_cells: - pickler.save_reduce(setattr, (t, 'cell_contents', obj)) + pickler.save_reduce(lambda cell, obj_ptr: cell.cell_contents(obj_ptr), (t, obj)) # pop None off created by setattr off stack pickler.write(bytes('0', 'UTF-8')) log.info("# %s" % _t) @@ -1587,7 +1599,6 @@ def save_function(pickler, obj): if PY3: #NOTE: workaround for 'super' (see issue #75) _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) - if OLD37 and _super: pickler._byref = True if _memo: pickler._recurse = False fkwdefaults = getattr(obj, '__kwdefaults__', None) pickler.save_reduce(_create_function, (obj.__code__, @@ -1596,18 +1607,16 @@ def save_function(pickler, obj): obj.__dict__, fkwdefaults), obj=obj) else: _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) - if OLD37 and _super: pickler._byref = True if _memo: pickler._recurse = False pickler.save_reduce(_create_function, (obj.func_code, globs, obj.func_name, obj.func_defaults, obj.func_closure, obj.__dict__), obj=obj) - if OLD37 and _super: pickler._byref = _byref if _memo: pickler._recurse = _recurse #clear = (_byref, _super, _recurse, _memo) #print(clear + (OLDER,)) #NOTE: workaround for #234; "partial" still is problematic for recurse - if OLDER and not _byref and (_super or (not _super and _memo) or (not _super and not _memo and _recurse)): pickler.clear_memo() + if OLDER and not _byref and (_memo or (not _memo and _recurse)): pickler.clear_memo() #if _memo: # stack.remove(id(obj)) # #pickler.clear_memo() diff --git a/tests/test_recursive.py b/tests/test_recursive.py index 5488af2b..cebe967e 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -7,7 +7,7 @@ import dill from functools import partial -from dill._dill import PY3, OLDER, OLD37 +from dill._dill import PY3, OLDER _super = super class obj1(object): @@ -27,20 +27,17 @@ def __init__(self): def test_super(): assert dill.copy(obj1(), byref=True) assert dill.copy(obj1(), byref=True, recurse=True) - if not OLD37: - assert dill.copy(obj1(), recurse=True) #FIXME: fails __main__.py + assert dill.copy(obj1(), recurse=True) assert dill.copy(obj1()) assert dill.copy(obj2(), byref=True) assert dill.copy(obj2(), byref=True, recurse=True) - if not OLD37: - assert dill.copy(obj2(), recurse=True) #FIXME: fails __main__.py + assert dill.copy(obj2(), recurse=True) assert dill.copy(obj2()) assert dill.copy(obj3(), byref=True) assert dill.copy(obj3(), byref=True, recurse=True) - if not OLD37: - assert dill.copy(obj3(), recurse=True) #FIXME: fails __main__.py + assert dill.copy(obj3(), recurse=True) assert dill.copy(obj3()) @@ -61,8 +58,7 @@ class Model(object): def test_partial(): assert dill.copy(Machine(), byref=True) assert dill.copy(Machine(), byref=True, recurse=True) - if not OLDER: - assert dill.copy(Machine(), recurse=True) + assert dill.copy(Machine(), recurse=True) assert dill.copy(Machine()) @@ -82,8 +78,7 @@ def __init__(self): def test_partials(): assert dill.copy(SubMachine(), byref=True) assert dill.copy(SubMachine(), byref=True, recurse=True) - if not OLD37: #FIXME: fails __main__.py - assert dill.copy(SubMachine(), recurse=True) + assert dill.copy(SubMachine(), recurse=True) assert dill.copy(SubMachine()) @@ -99,8 +94,7 @@ def __init__(self): def test_circular_reference(): - if not OLD37: - assert dill.copy(obj4()) + assert dill.copy(obj4()) if __name__ == '__main__': From 2c5d2fa63d99d1cdedd620330b8d2e11942e2e5a Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sun, 12 Dec 2021 00:06:49 -0500 Subject: [PATCH 07/46] Wrap in exec to prevent syntax errors in Python 2 --- dill/_dill.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 26ff9b85..6cf09eaf 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -880,20 +880,26 @@ def __getattribute__(self, attr): if PY3: def _create_cell(contents): return (lambda y: contents).__closure__[0] - def _create_reference_cell(): - # Hacky trick that allows for the creation of cells that can be spookily - # edited at a distance - contents = None - def updater(value): - nonlocal contents - contents = value - updater(updater) - return (lambda: contents).__closure__[0] + # Hacky trick that allows for the creation of cells that can be spookily + # edited at a distance and a very hacky way to avoid a syntax error in + # Python 2 + exec(''' +def _create_reference_cell(): + contents = None + def updater(value): + nonlocal contents + contents = value + updater(updater) + return (lambda: contents).__closure__[0] +''') else: def _create_cell(contents): return (lambda y: contents).func_closure[0] # _create_reference_cell not possible in Python 2 +def _update_cell(cell, obj_ptr): + return cell.cell_contents(obj_ptr) + def _create_weakref(obj, *args): from weakref import ref if obj is None: # it's dead @@ -1521,7 +1527,7 @@ def save_type(pickler, obj): if pickler_is_dill: recursive_cells = pickler._recursive_cells.pop(id(obj)) for t in recursive_cells: - pickler.save_reduce(lambda cell, obj_ptr: cell.cell_contents(obj_ptr), (t, obj)) + pickler.save_reduce(_update_cell, (t, obj)) # pop None off created by setattr off stack pickler.write(bytes('0', 'UTF-8')) log.info("# %s" % _t) @@ -1607,16 +1613,18 @@ def save_function(pickler, obj): obj.__dict__, fkwdefaults), obj=obj) else: _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) + if _super: pickler._byref = True if _memo: pickler._recurse = False pickler.save_reduce(_create_function, (obj.func_code, globs, obj.func_name, obj.func_defaults, obj.func_closure, obj.__dict__), obj=obj) + if _super: pickler._byref = _byref if _memo: pickler._recurse = _recurse #clear = (_byref, _super, _recurse, _memo) #print(clear + (OLDER,)) #NOTE: workaround for #234; "partial" still is problematic for recurse - if OLDER and not _byref and (_memo or (not _memo and _recurse)): pickler.clear_memo() + if OLDER and not _byref and (_super or (not _super and _memo) or (not _super and not _memo and _recurse)): pickler.clear_memo() #if _memo: # stack.remove(id(obj)) # #pickler.clear_memo() From 4ae150c2e5cb3ab5588982d3b85821c31fd75c5b Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sun, 12 Dec 2021 00:11:14 -0500 Subject: [PATCH 08/46] Fix testcase --- tests/test_recursive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_recursive.py b/tests/test_recursive.py index cebe967e..a66328ff 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -84,11 +84,11 @@ def test_partials(): class obj4: def __init__(self): - super().__init__() + super(obj4, self).__init__() a = self class obj5: def __init__(self): - super().__init__() + super(obj5, self).__init__() self.a = a self.b = obj5() From e33335efd30d8af4b9e3ce657ad033871a9b3c3f Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sun, 12 Dec 2021 00:40:29 -0500 Subject: [PATCH 09/46] Test impossible in Py2 --- dill/_dill.py | 6 +++--- tests/test_recursive.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 6cf09eaf..ad965ff4 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1498,7 +1498,7 @@ def save_type(pickler, obj): pickler_is_dill = is_dill(pickler, child=True) if issubclass(type(obj), type): # try: # used when pickling the class as code (or the interpreter) - if pickler_is_dill and not pickler._byref and id(obj) not in pickler._recursive_cells: + if pickler_is_dill and not pickler._byref and (not PY3 or id(obj) not in pickler._recursive_cells): # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' log.info("%s: %s" % (_t, obj)) @@ -1519,12 +1519,12 @@ def save_type(pickler, obj): #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) for name in _dict.get("__slots__", []): del _dict[name] - if pickler_is_dill: + if PY3 and pickler_is_dill: pickler._recursive_cells[id(obj)] = [] name = getattr(obj, "__qualname__", obj.__name__) pickler.save_reduce(_create_type, (type(obj), name, obj.__bases__, _dict), obj=obj) - if pickler_is_dill: + if PY3 and pickler_is_dill: recursive_cells = pickler._recursive_cells.pop(id(obj)) for t in recursive_cells: pickler.save_reduce(_update_cell, (t, obj)) diff --git a/tests/test_recursive.py b/tests/test_recursive.py index a66328ff..d5ed2c31 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -82,11 +82,11 @@ def test_partials(): assert dill.copy(SubMachine()) -class obj4: +class obj4(object): def __init__(self): super(obj4, self).__init__() a = self - class obj5: + class obj5(object): def __init__(self): super(obj5, self).__init__() self.a = a @@ -94,7 +94,8 @@ def __init__(self): def test_circular_reference(): - assert dill.copy(obj4()) + if PY3: + assert dill.copy(obj4()) if __name__ == '__main__': From bb16131d4309803be977acfcecf5a7d0daca1a11 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sun, 12 Dec 2021 01:30:50 -0500 Subject: [PATCH 10/46] Small correction --- dill/_dill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dill/_dill.py b/dill/_dill.py index ad965ff4..9aa2e53a 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1498,7 +1498,7 @@ def save_type(pickler, obj): pickler_is_dill = is_dill(pickler, child=True) if issubclass(type(obj), type): # try: # used when pickling the class as code (or the interpreter) - if pickler_is_dill and not pickler._byref and (not PY3 or id(obj) not in pickler._recursive_cells): + if PY3 and pickler_is_dill and not pickler._byref and id(obj) not in pickler._recursive_cells: # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' log.info("%s: %s" % (_t, obj)) From 56fcc30907879198ca777c473695ba5d55cee86e Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sun, 12 Dec 2021 01:42:24 -0500 Subject: [PATCH 11/46] Correct the correction --- dill/_dill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dill/_dill.py b/dill/_dill.py index 9aa2e53a..566945c5 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1498,7 +1498,7 @@ def save_type(pickler, obj): pickler_is_dill = is_dill(pickler, child=True) if issubclass(type(obj), type): # try: # used when pickling the class as code (or the interpreter) - if PY3 and pickler_is_dill and not pickler._byref and id(obj) not in pickler._recursive_cells: + if pickler_is_dill and not pickler._byref and id(obj) not in pickler._recursive_cells: # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' log.info("%s: %s" % (_t, obj)) From 42a93dcabdadb7d30e64782e515132cf52d8d2bc Mon Sep 17 00:00:00 2001 From: anivegesana Date: Tue, 14 Dec 2021 02:27:55 -0500 Subject: [PATCH 12/46] Add Python 2 support --- dill/_dill.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 566945c5..a44cbe28 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -880,21 +880,26 @@ def __getattribute__(self, attr): if PY3: def _create_cell(contents): return (lambda y: contents).__closure__[0] - # Hacky trick that allows for the creation of cells that can be spookily - # edited at a distance and a very hacky way to avoid a syntax error in - # Python 2 - exec(''' -def _create_reference_cell(): - contents = None - def updater(value): - nonlocal contents - contents = value - updater(updater) - return (lambda: contents).__closure__[0] -''') + def _create_reference_cell(): + contents = None + v = vars() + class Updater(object): + def __call__(self, value): + v['contents'] = value + contents = Updater() + return (lambda: contents).__closure__[0] else: def _create_cell(contents): return (lambda y: contents).func_closure[0] + def _create_reference_cell(): + contents = None + v = vars() + class Updater(object): + def __call__(self, value): + v['contents'] = value + contents = Updater() + return (lambda: contents).func_closure[0] + # _create_reference_cell not possible in Python 2 def _update_cell(cell, obj_ptr): From c1566c5883c50c7652a6df8db964b8fa87d1a43f Mon Sep 17 00:00:00 2001 From: anivegesana Date: Tue, 14 Dec 2021 02:33:12 -0500 Subject: [PATCH 13/46] Turn feature on for Python 2 --- dill/_dill.py | 9 ++++++--- tests/test_recursive.py | 3 +-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index a44cbe28..ea8ec3d6 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1524,17 +1524,20 @@ def save_type(pickler, obj): #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) for name in _dict.get("__slots__", []): del _dict[name] - if PY3 and pickler_is_dill: + if pickler_is_dill: pickler._recursive_cells[id(obj)] = [] name = getattr(obj, "__qualname__", obj.__name__) pickler.save_reduce(_create_type, (type(obj), name, obj.__bases__, _dict), obj=obj) - if PY3 and pickler_is_dill: + if pickler_is_dill: recursive_cells = pickler._recursive_cells.pop(id(obj)) for t in recursive_cells: pickler.save_reduce(_update_cell, (t, obj)) # pop None off created by setattr off stack - pickler.write(bytes('0', 'UTF-8')) + if PY3: + pickler.write(bytes('0', 'UTF-8')) + else: + pickler.write('0') log.info("# %s" % _t) # special cases: NoneType, NotImplementedType, EllipsisType elif obj is type(None): diff --git a/tests/test_recursive.py b/tests/test_recursive.py index d5ed2c31..fb3650e0 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -94,8 +94,7 @@ def __init__(self): def test_circular_reference(): - if PY3: - assert dill.copy(obj4()) + assert dill.copy(obj4()) if __name__ == '__main__': From 9b56c9718066ed2d5071db43f7dd3d3874107601 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Tue, 14 Dec 2021 02:40:49 -0500 Subject: [PATCH 14/46] Prefer function over object when possible --- dill/_dill.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index ea8ec3d6..c613c2a1 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -880,14 +880,15 @@ def __getattribute__(self, attr): if PY3: def _create_cell(contents): return (lambda y: contents).__closure__[0] - def _create_reference_cell(): - contents = None - v = vars() - class Updater(object): - def __call__(self, value): - v['contents'] = value - contents = Updater() - return (lambda: contents).__closure__[0] + exec(''' +def _create_reference_cell(): + contents = None + def updater(value): + nonlocal contents + contents = value + updater(updater) + return (lambda: contents).__closure__[0] +''') else: def _create_cell(contents): return (lambda y: contents).func_closure[0] From 419302a938837250d014fe065965dd3acf187fd0 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Tue, 14 Dec 2021 14:46:48 -0500 Subject: [PATCH 15/46] Add changes from review --- dill/_dill.py | 4 +--- tests/test_recursive.py | 8 ++------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index c613c2a1..b864367b 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -901,8 +901,6 @@ def __call__(self, value): contents = Updater() return (lambda: contents).func_closure[0] - # _create_reference_cell not possible in Python 2 - def _update_cell(cell, obj_ptr): return cell.cell_contents(obj_ptr) @@ -1534,7 +1532,7 @@ def save_type(pickler, obj): recursive_cells = pickler._recursive_cells.pop(id(obj)) for t in recursive_cells: pickler.save_reduce(_update_cell, (t, obj)) - # pop None off created by setattr off stack + # pop None off created by _update_cell off stack if PY3: pickler.write(bytes('0', 'UTF-8')) else: diff --git a/tests/test_recursive.py b/tests/test_recursive.py index fb3650e0..bc7e58c9 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -7,8 +7,6 @@ import dill from functools import partial -from dill._dill import PY3, OLDER -_super = super class obj1(object): def __init__(self): @@ -16,7 +14,7 @@ def __init__(self): class obj2(object): def __init__(self): - _super(obj2, self).__init__() + super(obj2, self).__init__() class obj3(object): super_ = super @@ -71,8 +69,7 @@ def member(self, model): class SubMachine(Machine2): def __init__(self): - _super(SubMachine, self).__init__() - #super(SubMachine, self).__init__() #XXX: works, except for 3.1-3.3 + super(SubMachine, self).__init__() def test_partials(): @@ -98,7 +95,6 @@ def test_circular_reference(): if __name__ == '__main__': - #print(('byref','_super','_recurse','_memo','_stop','OLDER')) test_super() test_partial() test_partials() From 36aa9adb1e44a58d0cf5ebeaf34862cc9f126710 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Tue, 14 Dec 2021 17:10:40 -0500 Subject: [PATCH 16/46] Turn off test_circular_reference for Python 2 --- dill/_dill.py | 2 +- tests/test_recursive.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index b864367b..d7f1f7c5 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1318,7 +1318,7 @@ def save_wrapper_descriptor(pickler, obj): @register(CellType) def save_cell(pickler, obj): f = obj.cell_contents - if is_dill(pickler, child=True): + if PY3 and is_dill(pickler, child=True): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: log.info("Ce2: %s" % obj) diff --git a/tests/test_recursive.py b/tests/test_recursive.py index bc7e58c9..e4463dc8 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -6,6 +6,7 @@ # - https://github.com/uqfoundation/dill/blob/master/LICENSE import dill +from dill._dill import PY3 from functools import partial class obj1(object): @@ -91,7 +92,8 @@ def __init__(self): def test_circular_reference(): - assert dill.copy(obj4()) + if PY3: + assert dill.copy(obj4()) if __name__ == '__main__': From eb3282487f5cf883d90119ac67aeaf9f38cc05ba Mon Sep 17 00:00:00 2001 From: anivegesana Date: Tue, 14 Dec 2021 20:49:19 -0500 Subject: [PATCH 17/46] Reformat and support empty cells --- dill/_dill.py | 91 ++++++++++++++++++++++++++++------------- tests/test_functions.py | 12 ++++++ tests/test_recursive.py | 3 ++ 3 files changed, 77 insertions(+), 29 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index d7f1f7c5..de7f2612 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -39,6 +39,7 @@ def _trace(boolean): # OLDER: 3.0 <= x < 3.4 *OR* x < 2.7.10 #NOTE: guessing relevant versions OLDER = (PY3 and sys.hexversion < 0x3040000) or (sys.hexversion < 0x2070ab1) OLD33 = (sys.hexversion < 0x3030000) +OLD37 = (sys.hexversion < 0x3070000) PY34 = (0x3040000 <= sys.hexversion < 0x3050000) if PY3: #XXX: get types from .objtypes ? import builtins as __builtin__ @@ -877,32 +878,52 @@ def __getattribute__(self, attr): attrs[index] = ".".join([attrs[index], attr]) return type(self)(attrs, index) -if PY3: - def _create_cell(contents): - return (lambda y: contents).__closure__[0] - exec(''' -def _create_reference_cell(): - contents = None - def updater(value): - nonlocal contents - contents = value - updater(updater) - return (lambda: contents).__closure__[0] -''') -else: - def _create_cell(contents): - return (lambda y: contents).func_closure[0] - def _create_reference_cell(): + +if OLD37 and PY3: + # Python 3.0 to 3.6 is in a weird case, where it is possible to pickle + # recursive cells, we can't assign directly to the cell. + + # A sentinel object to signal that the cell that is going to be created + # is either a reference to a value that isn't created yet and will be + # updated if passed into _create_cell or is a cell that is genuinely + # empty if passed into updater. + __CELL_EMPTY = object() + eval('''def _create_cell(contents=__CELL_EMPTY): + if contents is __CELL_EMPTY: contents = None - v = vars() - class Updater(object): - def __call__(self, value): - v['contents'] = value - contents = Updater() - return (lambda: contents).func_closure[0] + def updater(value): + nonlocal contents + if value is __CELL_EMPTY: + del contents + else: + contents = value + contents = updater + return (lambda: contents).__closure__[0]''') + + def _setattr(object, name, value): + if type(object) is CellType and name == 'cell_contents': + object.cell_contents(value) + else: + setattr(object, name, value) + + def _delattr(object, name): + if type(object) is CellType and name == 'cell_contents': + return object.cell_contents(__CELL_EMPTY) + else: + delattr(object, name) +else: + if PY3: + def _create_cell(contents=None): + return (lambda: contents).__closure__[0] + else: + def _create_cell(contents=None): + return (lambda: contents).func_closure[0] + + def _setattr(object, name, value): + return setattr(object, name, value) + def _delattr(object, name): + return delattr(object, name) -def _update_cell(cell, obj_ptr): - return cell.cell_contents(obj_ptr) def _create_weakref(obj, *args): from weakref import ref @@ -1317,12 +1338,24 @@ def save_wrapper_descriptor(pickler, obj): @register(CellType) def save_cell(pickler, obj): - f = obj.cell_contents + try: + f = obj.cell_contents + except: + log.info("Ce3: %s" % obj) + pickler.save_reduce(_create_cell, (), obj=obj) + pickler.save_reduce(_delattr, (obj, 'cell_contents')) + # pop None off created by _setattr off stack + if PY3: + pickler.write(bytes('0', 'UTF-8')) + else: + pickler.write('0') # pragma: no cover + log.info("# Ce3") + return if PY3 and is_dill(pickler, child=True): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: log.info("Ce2: %s" % obj) - pickler.save_reduce(_create_reference_cell, (), obj=obj) + pickler.save_reduce(_create_cell, (), obj=obj) recursive_cells.append(obj) log.info("# Ce2") return @@ -1531,12 +1564,12 @@ def save_type(pickler, obj): if pickler_is_dill: recursive_cells = pickler._recursive_cells.pop(id(obj)) for t in recursive_cells: - pickler.save_reduce(_update_cell, (t, obj)) - # pop None off created by _update_cell off stack + pickler.save_reduce(_setattr, (t, 'cell_contents', obj)) + # pop None off created by _setattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) else: - pickler.write('0') + pickler.write('0') # pragma: no cover log.info("# %s" % _t) # special cases: NoneType, NotImplementedType, EllipsisType elif obj is type(None): diff --git a/tests/test_functions.py b/tests/test_functions.py index 23de5f09..367e0646 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -36,6 +36,14 @@ def function_e(e, *e1, e2=1, e3=2): return e + sum(e1) + e2 + e3''') +# https://stackoverflow.com/a/45661180 +if is_py3(): + exec('''def make_empty_cell(): + if False: + del value + return (lambda: value).__closure__[0]''') + + def test_functions(): dumped_func_a = dill.dumps(function_a) assert dill.loads(dumped_func_a)(0) == 0 @@ -53,6 +61,10 @@ def test_functions(): assert dill.loads(dumped_func_d)(1, 2, d2=3) == 6 if is_py3(): + empty_cell = make_empty_cell() + cell_copy = dill.loads(dill.dumps(empty_cell)) + assert 'empty' in str(cell_copy) + exec(''' dumped_func_e = dill.dumps(function_e) assert dill.loads(dumped_func_e)(1, 2) == 6 diff --git a/tests/test_recursive.py b/tests/test_recursive.py index e4463dc8..454a5674 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -94,6 +94,9 @@ def __init__(self): def test_circular_reference(): if PY3: assert dill.copy(obj4()) + obj4_copy = dill.loads(dill.dumps(obj4())) + assert type(obj4_copy) is type(obj4_copy).__init__.__closure__[0].cell_contents + assert type(obj4_copy.b) is type(obj4_copy.b).__init__.__closure__[0].cell_contents if __name__ == '__main__': From 64187c930f4088963f23aceee4a122d22f542f13 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 15 Dec 2021 00:08:50 -0500 Subject: [PATCH 18/46] Solve some more versioning issues --- dill/_dill.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index de7f2612..687a20e0 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -888,17 +888,18 @@ def __getattribute__(self, attr): # updated if passed into _create_cell or is a cell that is genuinely # empty if passed into updater. __CELL_EMPTY = object() - eval('''def _create_cell(contents=__CELL_EMPTY): + __nonlocal = ('nonlocal',) if PY3 else ('x = ',) + exec('''def _create_cell(contents=__CELL_EMPTY): if contents is __CELL_EMPTY: contents = None def updater(value): - nonlocal contents + %s contents if value is __CELL_EMPTY: del contents else: contents = value contents = updater - return (lambda: contents).__closure__[0]''') + return (lambda: contents).__closure__[0]''' % __nonlocal) def _setattr(object, name, value): if type(object) is CellType and name == 'cell_contents': From 436e4993907f4cd9e9b1cd8a5488de0b1fdf289a Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 15 Dec 2021 18:50:25 -0500 Subject: [PATCH 19/46] Add shim that chooses the correct function at unpickling --- dill/_dill.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 687a20e0..e674677b 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -448,6 +448,25 @@ def load_session(filename='/tmp/session.pkl', main=None, **kwds): ### End: Pickle the Interpreter +class BuiltinShim: + ''' + Refers to a shim function in dill._dill if it exists and to a builtin + function if it doesn't exist. This choice is made during the unpickle + step instead of the pickling process. + ''' + def __init__(self, shim_name, builtin): + self.shim_name = shim_name + self.builtin = builtin + self.__call__ = getattr(globals(), shim_name, builtin) + def __copy__(self): + return self + def __deepcopy__(self, memo): + return self + def __call__(self, *args, **kwargs): + return getattr(globals(), shim_name, builtin)(*args, **kwargs) + def __reduce__(self): + return (getattr, (sys.modules[__name__], self.shim_name, self.builtin)) + class MetaCatchingDict(dict): def get(self, key, default=None): try: @@ -920,10 +939,8 @@ def _create_cell(contents=None): def _create_cell(contents=None): return (lambda: contents).func_closure[0] - def _setattr(object, name, value): - return setattr(object, name, value) - def _delattr(object, name): - return delattr(object, name) +_setattr_shim = BuiltinShim('_setattr', setattr) +_delattr_shim = BuiltinShim('_delattr', delattr) def _create_weakref(obj, *args): @@ -1344,8 +1361,8 @@ def save_cell(pickler, obj): except: log.info("Ce3: %s" % obj) pickler.save_reduce(_create_cell, (), obj=obj) - pickler.save_reduce(_delattr, (obj, 'cell_contents')) - # pop None off created by _setattr off stack + pickler.save_reduce(_delattr_shim, (obj, 'cell_contents')) + # pop None off created by _delattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) else: @@ -1565,7 +1582,7 @@ def save_type(pickler, obj): if pickler_is_dill: recursive_cells = pickler._recursive_cells.pop(id(obj)) for t in recursive_cells: - pickler.save_reduce(_setattr, (t, 'cell_contents', obj)) + pickler.save_reduce(_setattr_shim, (t, 'cell_contents', obj)) # pop None off created by _setattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) From 2c304367efabbe2a553e5c01b7d1236175a7e395 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 15 Dec 2021 19:01:48 -0500 Subject: [PATCH 20/46] Avoid pickling dill._dill --- dill/_dill.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dill/_dill.py b/dill/_dill.py index e674677b..3a8a82c2 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -454,6 +454,16 @@ class BuiltinShim: function if it doesn't exist. This choice is made during the unpickle step instead of the pickling process. ''' + class DillRef: + def __copy__(self): + return self + def __deepcopy__(self, memo): + return self + def __call__(self): + pass + def __reduce__(self): + return (__import__, ('dill._dill',)) + dill_module = DillRef() def __init__(self, shim_name, builtin): self.shim_name = shim_name self.builtin = builtin @@ -465,7 +475,7 @@ def __deepcopy__(self, memo): def __call__(self, *args, **kwargs): return getattr(globals(), shim_name, builtin)(*args, **kwargs) def __reduce__(self): - return (getattr, (sys.modules[__name__], self.shim_name, self.builtin)) + return (getattr, (self.dill_module, self.shim_name, self.builtin)) class MetaCatchingDict(dict): def get(self, key, default=None): From 093d9cabc519d1170060602e8d26045a95649012 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 15 Dec 2021 19:34:26 -0500 Subject: [PATCH 21/46] Small correction --- dill/_dill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dill/_dill.py b/dill/_dill.py index 3a8a82c2..4e6bba7e 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -462,7 +462,7 @@ def __deepcopy__(self, memo): def __call__(self): pass def __reduce__(self): - return (__import__, ('dill._dill',)) + return (_import_module, ('dill._dill',)) dill_module = DillRef() def __init__(self, shim_name, builtin): self.shim_name = shim_name From 8ea0e26ef55874aa353a24b5b866973f71f7c9ed Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 15 Dec 2021 19:45:32 -0500 Subject: [PATCH 22/46] This is why dill._dill would try to pickle --- dill/_dill.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 4e6bba7e..d73b2b4e 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -454,16 +454,6 @@ class BuiltinShim: function if it doesn't exist. This choice is made during the unpickle step instead of the pickling process. ''' - class DillRef: - def __copy__(self): - return self - def __deepcopy__(self, memo): - return self - def __call__(self): - pass - def __reduce__(self): - return (_import_module, ('dill._dill',)) - dill_module = DillRef() def __init__(self, shim_name, builtin): self.shim_name = shim_name self.builtin = builtin @@ -475,7 +465,7 @@ def __deepcopy__(self, memo): def __call__(self, *args, **kwargs): return getattr(globals(), shim_name, builtin)(*args, **kwargs) def __reduce__(self): - return (getattr, (self.dill_module, self.shim_name, self.builtin)) + return (getattr, (sys.modules[__name__], self.shim_name, self.builtin)) class MetaCatchingDict(dict): def get(self, key, default=None): @@ -1502,22 +1492,22 @@ def save_weakproxy(pickler, obj): @register(ModuleType) def save_module(pickler, obj): if False: #_use_diff: - if obj.__name__ != "dill": + if obj.__name__.split('.', 1)[0] != "dill": try: changed = diff.whats_changed(obj, seen=pickler._diff_cache)[0] except RuntimeError: # not memorised module, probably part of dill pass else: - log.info("M1: %s with diff" % obj) + log.info("M2: %s with diff" % obj) log.info("Diff: %s", changed.keys()) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj, state=changed) - log.info("# M1") + log.info("# M2") return - log.info("M2: %s" % obj) + log.info("M1: %s" % obj) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj) - log.info("# M2") + log.info("# M1") else: # if a module file name starts with prefix, it should be a builtin # module, so should be pickled as a reference @@ -1530,7 +1520,7 @@ def save_module(pickler, obj): 'site-packages' in obj.__file__) else: builtin_mod = True - if obj.__name__ not in ("builtins", "dill") \ + if obj.__name__ not in ("builtins", "dill", "dill._dill") \ and not builtin_mod or is_dill(pickler, child=True) and obj is pickler._main: log.info("M1: %s" % obj) _main_dict = obj.__dict__.copy() #XXX: better no copy? option to copy? From 15908d332396c4fff3e0d71ccbd6e16210cf9a1a Mon Sep 17 00:00:00 2001 From: anivegesana Date: Thu, 16 Dec 2021 01:52:27 -0500 Subject: [PATCH 23/46] Add shim for reference cells --- dill/_dill.py | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index d73b2b4e..4d0c04dc 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -448,12 +448,23 @@ def load_session(filename='/tmp/session.pkl', main=None, **kwds): ### End: Pickle the Interpreter +class sentinel: + """ + Create a unique sentinel object that is pickled as a constant. + """ + def __init__(self, name): + self.name = name + def __repr__(self): + return __name__ + '.' + self.name + def __reduce__(self): + return repr(self) + class BuiltinShim: - ''' + """ Refers to a shim function in dill._dill if it exists and to a builtin function if it doesn't exist. This choice is made during the unpickle step instead of the pickling process. - ''' + """ def __init__(self, shim_name, builtin): self.shim_name = shim_name self.builtin = builtin @@ -898,22 +909,24 @@ def __getattribute__(self, attr): return type(self)(attrs, index) +# A sentinel object to signal that the cell that is going to be created +# is either a reference to a value that isn't created yet and will be +# updated if passed into _create_cell or is a cell that is genuinely +# empty if passed into updater. +# Can be safely replaced with None once breaking changes are allowed. +_CELL_REF = sentinel('_CELL_REF') +_CELL_REF_shim = BuiltinShim('_CELL_REF', None) + if OLD37 and PY3: # Python 3.0 to 3.6 is in a weird case, where it is possible to pickle # recursive cells, we can't assign directly to the cell. - - # A sentinel object to signal that the cell that is going to be created - # is either a reference to a value that isn't created yet and will be - # updated if passed into _create_cell or is a cell that is genuinely - # empty if passed into updater. - __CELL_EMPTY = object() __nonlocal = ('nonlocal',) if PY3 else ('x = ',) - exec('''def _create_cell(contents=__CELL_EMPTY): - if contents is __CELL_EMPTY: + exec('''def _create_cell(contents=_CELL_REF): + if contents is _CELL_REF: contents = None def updater(value): %s contents - if value is __CELL_EMPTY: + if value is _CELL_REF: del contents else: contents = value @@ -928,15 +941,15 @@ def _setattr(object, name, value): def _delattr(object, name): if type(object) is CellType and name == 'cell_contents': - return object.cell_contents(__CELL_EMPTY) + return object.cell_contents(_CELL_REF) else: delattr(object, name) else: if PY3: - def _create_cell(contents=None): + def _create_cell(contents=_CELL_REF): return (lambda: contents).__closure__[0] else: - def _create_cell(contents=None): + def _create_cell(contents=_CELL_REF): return (lambda: contents).func_closure[0] _setattr_shim = BuiltinShim('_setattr', setattr) @@ -1360,7 +1373,7 @@ def save_cell(pickler, obj): f = obj.cell_contents except: log.info("Ce3: %s" % obj) - pickler.save_reduce(_create_cell, (), obj=obj) + pickler.save_reduce(_create_cell, (_CELL_REF_shim,), obj=obj) pickler.save_reduce(_delattr_shim, (obj, 'cell_contents')) # pop None off created by _delattr off stack if PY3: @@ -1373,7 +1386,7 @@ def save_cell(pickler, obj): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: log.info("Ce2: %s" % obj) - pickler.save_reduce(_create_cell, (), obj=obj) + pickler.save_reduce(_create_cell, (_CELL_REF_shim,), obj=obj) recursive_cells.append(obj) log.info("# Ce2") return @@ -1529,6 +1542,10 @@ def save_module(pickler, obj): pickler.save_reduce(_import_module, (obj.__name__,), obj=obj, state=_main_dict) log.info("# M1") + elif obj.__name__ == "dill._dill": + log.info("M2: %s" % obj) + pickler.save_global(obj) + log.info("# M2") else: log.info("M2: %s" % obj) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj) From 14bde51693fa2ef77aaaf5ce8a6abd8f24fd175f Mon Sep 17 00:00:00 2001 From: anivegesana Date: Thu, 16 Dec 2021 02:02:54 -0500 Subject: [PATCH 24/46] Copy functions for sentinel --- dill/_dill.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dill/_dill.py b/dill/_dill.py index 4d0c04dc..bd0ca6f5 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -456,6 +456,10 @@ def __init__(self, name): self.name = name def __repr__(self): return __name__ + '.' + self.name + def __copy__(self): + return self + def __deepcopy__(self, memo): + return self def __reduce__(self): return repr(self) From 9eafe5554c354e9e70f2a16aef1d558e9a252d63 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Thu, 23 Dec 2021 17:50:38 -0500 Subject: [PATCH 25/46] Recursive function cells --- dill/_dill.py | 80 +++++++++++++++++++++++------------------ tests/test_recursive.py | 11 ++++++ 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index bd0ca6f5..55ad0f73 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -29,8 +29,6 @@ def _trace(boolean): else: log.setLevel(logging.WARN) return -stack = dict() # record of 'recursion-sensitive' pickled objects - import os import sys diff = None @@ -496,6 +494,33 @@ def __missing__(self, key): raise KeyError() +def _enter_recursive_cell_stack(pickler, obj, is_pickler_dill=None): + if is_pickler_dill is None: + is_pickler_dill = is_dill(pickler, child=True) + if is_pickler_dill: + # assert id(obj) not in pickler._recursive_cells, str(obj) + ' already pushed on stack!' + # if not hasattr(pickler, 'x'): pickler.x = 0 + # print(pickler.x*' ', 'push', obj, id(obj), pickler._recurse) + # pickler.x += 1 + l = [] + pickler._recursive_cells[id(obj)] = (len(pickler._recursive_cells), l) + return l +def _exit_recursive_cell_stack(pickler, obj, is_pickler_dill=None): + if is_pickler_dill is None: + is_pickler_dill = is_dill(pickler, child=True) + if is_pickler_dill: + # pickler.x -= 1 + # print(pickler.x*' ', 'pop', obj, id(obj)) + i, recursive_cells = pickler._recursive_cells.pop(id(obj)) + # assert i == len(pickler._recursive_cells), 'Stack tampered!' + for t in recursive_cells: + pickler.save_reduce(_setattr_shim, (t, 'cell_contents', obj)) + # pop None off created by _setattr off stack + if PY3: + pickler.write(bytes('0', 'UTF-8')) + else: + pickler.write('0') # pragma: no cover + ### Extend the Picklers class Pickler(StockPickler): """python's Pickler extended to interpreter sessions""" @@ -519,7 +544,6 @@ def __init__(self, *args, **kwds): self._recursive_cells = {} def dump(self, obj): #NOTE: if settings change, need to update attributes - stack.clear() # clear record of 'recursion-sensitive' pickled objects # register if the object is a numpy ufunc # thanks to Paul Kienzle for pointing out ufuncs didn't pickle if NumpyUfuncType and numpyufunc(obj): @@ -564,7 +588,6 @@ def save_numpy_array(pickler, obj): raise PicklingError(msg) else: StockPickler.dump(self, obj) - stack.clear() # clear record of 'recursion-sensitive' pickled objects return dump.__doc__ = StockPickler.dump.__doc__ pass @@ -1144,7 +1167,6 @@ def save_module_dict(pickler, obj): @register(ClassType) def save_classobj(pickler, obj): #FIXME: enable pickler._byref - #stack[id(obj)] = len(stack), obj if obj.__module__ == '__main__': #XXX: use _main_module.__name__ everywhere? log.info("C1: %s" % obj) pickler.save_reduce(ClassType, (obj.__name__, obj.__bases__, @@ -1391,7 +1413,7 @@ def save_cell(pickler, obj): if recursive_cells is not None: log.info("Ce2: %s" % obj) pickler.save_reduce(_create_cell, (_CELL_REF_shim,), obj=obj) - recursive_cells.append(obj) + recursive_cells[1].append(obj) log.info("# Ce2") return log.info("Ce1: %s" % obj) @@ -1559,7 +1581,6 @@ def save_module(pickler, obj): @register(TypeType) def save_type(pickler, obj): - #stack[id(obj)] = len(stack), obj #XXX: probably don't obj in all cases below if obj in _typemap: log.info("T1: %s" % obj) pickler.save_reduce(_load_type, (_typemap[obj],), obj=obj) @@ -1595,20 +1616,11 @@ def save_type(pickler, obj): #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) for name in _dict.get("__slots__", []): del _dict[name] - if pickler_is_dill: - pickler._recursive_cells[id(obj)] = [] name = getattr(obj, "__qualname__", obj.__name__) + _enter_recursive_cell_stack(pickler, obj, pickler_is_dill) pickler.save_reduce(_create_type, (type(obj), name, obj.__bases__, _dict), obj=obj) - if pickler_is_dill: - recursive_cells = pickler._recursive_cells.pop(id(obj)) - for t in recursive_cells: - pickler.save_reduce(_setattr_shim, (t, 'cell_contents', obj)) - # pop None off created by _setattr off stack - if PY3: - pickler.write(bytes('0', 'UTF-8')) - else: - pickler.write('0') # pragma: no cover + _exit_recursive_cell_stack(pickler, obj, pickler_is_dill) log.info("# %s" % _t) # special cases: NoneType, NotImplementedType, EllipsisType elif obj is type(None): @@ -1662,29 +1674,25 @@ def save_classmethod(pickler, obj): def save_function(pickler, obj): if not _locate_function(obj): #, pickler._session): log.info("F1: %s" % obj) - if getattr(pickler, '_recurse', False): + _recursive_cells = getattr(pickler, '_recursive_cells', ()) + _byref = getattr(pickler, '_byref', None) + _recurse = getattr(pickler, '_recurse', None) + _memo = (id(obj) in _recursive_cells) and (_recurse is not None) + if _recurse and not _memo: # recurse to get all globals referred to by obj from .detect import globalvars globs = globalvars(obj, recurse=True, builtin=True) - # remove objects that have already been serialized - #stacktypes = (ClassType, TypeType, FunctionType) - #for key,value in list(globs.items()): - # if isinstance(value, stacktypes) and id(value) in stack: - # del globs[key] - # ABORT: if self-references, use _recurse=False - if id(obj) in stack: # or obj in globs.values(): - globs = obj.__globals__ if PY3 else obj.func_globals else: globs = obj.__globals__ if PY3 else obj.func_globals - _byref = getattr(pickler, '_byref', None) - _recurse = getattr(pickler, '_recurse', None) - _memo = (id(obj) in stack) and (_recurse is not None) - #print("stack: %s + '%s'" % (set(hex(i) for i in stack),hex(id(obj)))) - stack[id(obj)] = len(stack), obj + #print("stack: %s + '%s'" % (set(hex(i) for i in stack),hex(id(obj)))) + # stack[id(obj)] = len(stack), obj + if _memo: + pickler._recurse = False + else: + _enter_recursive_cell_stack(pickler, obj) if PY3: #NOTE: workaround for 'super' (see issue #75) _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) - if _memo: pickler._recurse = False fkwdefaults = getattr(obj, '__kwdefaults__', None) pickler.save_reduce(_create_function, (obj.__code__, globs, obj.__name__, @@ -1693,13 +1701,15 @@ def save_function(pickler, obj): else: _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) if _super: pickler._byref = True - if _memo: pickler._recurse = False pickler.save_reduce(_create_function, (obj.func_code, globs, obj.func_name, obj.func_defaults, obj.func_closure, obj.__dict__), obj=obj) if _super: pickler._byref = _byref - if _memo: pickler._recurse = _recurse + if _memo: + pickler._recurse = _recurse + else: + _exit_recursive_cell_stack(pickler, obj) #clear = (_byref, _super, _recurse, _memo) #print(clear + (OLDER,)) #NOTE: workaround for #234; "partial" still is problematic for recurse diff --git a/tests/test_recursive.py b/tests/test_recursive.py index 454a5674..83e3111d 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -99,8 +99,19 @@ def test_circular_reference(): assert type(obj4_copy.b) is type(obj4_copy.b).__init__.__closure__[0].cell_contents +def f(): + def g(): + return g + return g + + +def test_function_cells(): + assert dill.copy(f()) + + if __name__ == '__main__': test_super() test_partial() test_partials() test_circular_reference() + test_function_cells() From 50bbc916e47d3521c33058749d1ef31844207db7 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Thu, 23 Dec 2021 17:52:53 -0500 Subject: [PATCH 26/46] Turn on test case for Python 3 only --- tests/test_recursive.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_recursive.py b/tests/test_recursive.py index 83e3111d..c6ed757a 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -106,7 +106,8 @@ def g(): def test_function_cells(): - assert dill.copy(f()) + if PY3: + assert dill.copy(f()) if __name__ == '__main__': From 03fde841c3c902e393bf00baf756ae4f14f0ab71 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Thu, 23 Dec 2021 23:39:38 -0500 Subject: [PATCH 27/46] Cell manipulation on PyPy 2.7 --- dill/_dill.py | 67 ++++++++++++++++++++++++++++++++--------- tests/test_recursive.py | 7 ++--- 2 files changed, 56 insertions(+), 18 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 55ad0f73..6e092189 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -105,6 +105,8 @@ def _trace(boolean): except ImportError: HAS_CTYPES = False IS_PYPY = False +if IS_PYPY: + import __pypy__ IS_PYPY2 = IS_PYPY and not PY3 NumpyUfuncType = None NumpyDType = None @@ -446,7 +448,7 @@ def load_session(filename='/tmp/session.pkl', main=None, **kwds): ### End: Pickle the Interpreter -class sentinel: +class sentinel(object): """ Create a unique sentinel object that is pickled as a constant. """ @@ -459,9 +461,11 @@ def __copy__(self): def __deepcopy__(self, memo): return self def __reduce__(self): - return repr(self) + return self.name + def __reduce_ex__(self, protocol): + return self.name -class BuiltinShim: +class BuiltinShim(object): """ Refers to a shim function in dill._dill if it exists and to a builtin function if it doesn't exist. This choice is made during the unpickle @@ -479,6 +483,8 @@ def __call__(self, *args, **kwargs): return getattr(globals(), shim_name, builtin)(*args, **kwargs) def __reduce__(self): return (getattr, (sys.modules[__name__], self.shim_name, self.builtin)) + def __reduce_ex__(self, protocol): + return self.__reduce__() class MetaCatchingDict(dict): def get(self, key, default=None): @@ -947,12 +953,12 @@ def __getattribute__(self, attr): if OLD37 and PY3: # Python 3.0 to 3.6 is in a weird case, where it is possible to pickle # recursive cells, we can't assign directly to the cell. - __nonlocal = ('nonlocal',) if PY3 else ('x = ',) + __nonlocal = ('nonlocal contents',) if PY3 else ('',) exec('''def _create_cell(contents=_CELL_REF): if contents is _CELL_REF: contents = None def updater(value): - %s contents + %s if value is _CELL_REF: del contents else: @@ -971,13 +977,46 @@ def _delattr(object, name): return object.cell_contents(_CELL_REF) else: delattr(object, name) -else: - if PY3: - def _create_cell(contents=_CELL_REF): - return (lambda: contents).__closure__[0] - else: - def _create_cell(contents=_CELL_REF): - return (lambda: contents).func_closure[0] + +elif PY3: + def _create_cell(contents=_CELL_REF): + return (lambda: contents).__closure__[0] + +elif IS_PYPY2: + def _create_cell(contents=_CELL_REF): + return (lambda: contents).func_closure[0] + + def _setattr(object, name, value): + if type(object) is CellType and name == 'cell_contents': + __pypy__.internal_repr(object).set(value) + else: + setattr(object, name, value) + + def _delattr(object, name): + if type(object) is CellType and name == 'cell_contents': + __pypy__.internal_repr(object).delete() + else: + delattr(object, name) + +else: # CPython 2 + def _create_cell(contents=_CELL_REF): + return (lambda: contents).func_closure[0] + + _PyCell_Set = ctypes.pythonapi.PyCell_Set + + def _setattr(object, name, value): + if type(object) is CellType and name == 'cell_contents': + _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) + _PyCell_Set(object, value) + else: + setattr(object, name, value) + + def _delattr(object, name): + if type(object) is CellType and name == 'cell_contents': + _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) + _PyCell_Set(object, None) + else: + delattr(object, name) _setattr_shim = BuiltinShim('_setattr', setattr) _delattr_shim = BuiltinShim('_delattr', delattr) @@ -1408,7 +1447,7 @@ def save_cell(pickler, obj): pickler.write('0') # pragma: no cover log.info("# Ce3") return - if PY3 and is_dill(pickler, child=True): + if is_dill(pickler, child=True): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: log.info("Ce2: %s" % obj) @@ -1568,7 +1607,7 @@ def save_module(pickler, obj): pickler.save_reduce(_import_module, (obj.__name__,), obj=obj, state=_main_dict) log.info("# M1") - elif obj.__name__ == "dill._dill": + elif PY3 and obj.__name__ == "dill._dill": log.info("M2: %s" % obj) pickler.save_global(obj) log.info("# M2") diff --git a/tests/test_recursive.py b/tests/test_recursive.py index c6ed757a..ecd4536f 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -92,9 +92,9 @@ def __init__(self): def test_circular_reference(): + assert dill.copy(obj4()) + obj4_copy = dill.loads(dill.dumps(obj4())) if PY3: - assert dill.copy(obj4()) - obj4_copy = dill.loads(dill.dumps(obj4())) assert type(obj4_copy) is type(obj4_copy).__init__.__closure__[0].cell_contents assert type(obj4_copy.b) is type(obj4_copy.b).__init__.__closure__[0].cell_contents @@ -106,8 +106,7 @@ def g(): def test_function_cells(): - if PY3: - assert dill.copy(f()) + assert dill.copy(f()) if __name__ == '__main__': From c0341262a57bffa095f1744c4c7e0ca156792b0c Mon Sep 17 00:00:00 2001 From: anivegesana Date: Fri, 24 Dec 2021 00:30:58 -0500 Subject: [PATCH 28/46] Not possible in PyPy 2.7 --- dill/_dill.py | 6 +++--- tests/test_recursive.py | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 6e092189..6a01804c 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -988,13 +988,13 @@ def _create_cell(contents=_CELL_REF): def _setattr(object, name, value): if type(object) is CellType and name == 'cell_contents': - __pypy__.internal_repr(object).set(value) + raise SyntaxError('Not possible to edit a cell in PyPy2') else: setattr(object, name, value) def _delattr(object, name): if type(object) is CellType and name == 'cell_contents': - __pypy__.internal_repr(object).delete() + raise SyntaxError('Not possible to edit a cell in PyPy2') else: delattr(object, name) @@ -1447,7 +1447,7 @@ def save_cell(pickler, obj): pickler.write('0') # pragma: no cover log.info("# Ce3") return - if is_dill(pickler, child=True): + if not IS_PYPY2 and is_dill(pickler, child=True): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: log.info("Ce2: %s" % obj) diff --git a/tests/test_recursive.py b/tests/test_recursive.py index ecd4536f..0ba814d4 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -6,7 +6,7 @@ # - https://github.com/uqfoundation/dill/blob/master/LICENSE import dill -from dill._dill import PY3 +from dill._dill import PY3, IS_PYPY2 from functools import partial class obj1(object): @@ -106,7 +106,8 @@ def g(): def test_function_cells(): - assert dill.copy(f()) + if not IS_PYPY2: + assert dill.copy(f()) if __name__ == '__main__': From 2921143a231f48f6274fc89d69d17f4fb93ecba3 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Fri, 24 Dec 2021 00:37:26 -0500 Subject: [PATCH 29/46] Correctly remove test case --- tests/test_recursive.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_recursive.py b/tests/test_recursive.py index 0ba814d4..45f0aeaf 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -92,8 +92,9 @@ def __init__(self): def test_circular_reference(): - assert dill.copy(obj4()) - obj4_copy = dill.loads(dill.dumps(obj4())) + if not IS_PYPY2: + assert dill.copy(obj4()) + obj4_copy = dill.loads(dill.dumps(obj4())) if PY3: assert type(obj4_copy) is type(obj4_copy).__init__.__closure__[0].cell_contents assert type(obj4_copy.b) is type(obj4_copy.b).__init__.__closure__[0].cell_contents From fc711c83bc623994ea9226fcfaf7510507043d7c Mon Sep 17 00:00:00 2001 From: anivegesana Date: Fri, 24 Dec 2021 00:49:18 -0500 Subject: [PATCH 30/46] Fix coverage --- dill/_dill.py | 14 ++++++-------- tests/test_functions.py | 7 ++++++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 6a01804c..cd11ada6 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -105,8 +105,6 @@ def _trace(boolean): except ImportError: HAS_CTYPES = False IS_PYPY = False -if IS_PYPY: - import __pypy__ IS_PYPY2 = IS_PYPY and not PY3 NumpyUfuncType = None NumpyDType = None @@ -455,11 +453,11 @@ class sentinel(object): def __init__(self, name): self.name = name def __repr__(self): - return __name__ + '.' + self.name + return __name__ + '.' + self.name # pragma: no cover def __copy__(self): - return self + return self # pragma: no cover def __deepcopy__(self, memo): - return self + return self # pragma: no cover def __reduce__(self): return self.name def __reduce_ex__(self, protocol): @@ -476,11 +474,11 @@ def __init__(self, shim_name, builtin): self.builtin = builtin self.__call__ = getattr(globals(), shim_name, builtin) def __copy__(self): - return self + return self # pragma: no cover def __deepcopy__(self, memo): - return self + return self # pragma: no cover def __call__(self, *args, **kwargs): - return getattr(globals(), shim_name, builtin)(*args, **kwargs) + return getattr(globals(), shim_name, builtin)(*args, **kwargs) # pragma: no cover def __reduce__(self): return (getattr, (sys.modules[__name__], self.shim_name, self.builtin)) def __reduce_ex__(self, protocol): diff --git a/tests/test_functions.py b/tests/test_functions.py index 367e0646..f93598a2 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -7,6 +7,7 @@ import dill import sys +import platform dill.settings['recurse'] = True @@ -73,7 +74,11 @@ def test_functions(): assert dill.loads(dumped_func_e)(1, 2, e2=3, e3=4) == 10 assert dill.loads(dumped_func_e)(1, 2, 3, e2=4) == 12 assert dill.loads(dumped_func_e)(1, 2, 3, e2=4, e3=5) == 15''') - + elif platform.python_implementation() == 'CPython': + empty_cell = dill._dill._create_cell() + dill._dill._delattr(empty_cell, 'cell_contents') + cell_copy = dill.loads(dill.dumps(empty_cell)) + assert 'empty' in str(cell_copy) if __name__ == '__main__': test_functions() From 5568e79797c68e110c9e3899a29bc773fd0a04c7 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Fri, 24 Dec 2021 14:03:41 -0500 Subject: [PATCH 31/46] PyPy 2.7 Attempt 3 --- dill/_dill.py | 133 ++++++++++++++++++++-------------------- dill/nonlocals.py | 122 ++++++++++++++++++++++++++++++++++++ tests/test_recursive.py | 8 +-- 3 files changed, 193 insertions(+), 70 deletions(-) create mode 100644 dill/nonlocals.py diff --git a/dill/_dill.py b/dill/_dill.py index cd11ada6..1e93ebf1 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -940,81 +940,84 @@ def __getattribute__(self, attr): return type(self)(attrs, index) -# A sentinel object to signal that the cell that is going to be created -# is either a reference to a value that isn't created yet and will be -# updated if passed into _create_cell or is a cell that is genuinely -# empty if passed into updater. -# Can be safely replaced with None once breaking changes are allowed. -_CELL_REF = sentinel('_CELL_REF') -_CELL_REF_shim = BuiltinShim('_CELL_REF', None) - -if OLD37 and PY3: - # Python 3.0 to 3.6 is in a weird case, where it is possible to pickle - # recursive cells, we can't assign directly to the cell. - __nonlocal = ('nonlocal contents',) if PY3 else ('',) - exec('''def _create_cell(contents=_CELL_REF): - if contents is _CELL_REF: - contents = None - def updater(value): - %s - if value is _CELL_REF: - del contents - else: - contents = value - contents = updater - return (lambda: contents).__closure__[0]''' % __nonlocal) - def _setattr(object, name, value): - if type(object) is CellType and name == 'cell_contents': - object.cell_contents(value) - else: - setattr(object, name, value) +# Used to stay compatible with versions of dill whose _create_cell functions +# do not have a default value. +# Can be safely replaced removed entirely (replaced by empty tuples for calls to +# _create_cell) once breaking changes are allowed. +_CELL_REF_shim = None - def _delattr(object, name): - if type(object) is CellType and name == 'cell_contents': - return object.cell_contents(_CELL_REF) - else: - delattr(object, name) - -elif PY3: - def _create_cell(contents=_CELL_REF): +if PY3: + def _create_cell(contents=None): return (lambda: contents).__closure__[0] -elif IS_PYPY2: - def _create_cell(contents=_CELL_REF): + if OLD37: + # Python 3.0 to 3.6 is in a weird case, where it is possible to pickle + # recursive cells, we can't assign directly to the cell. + # https://stackoverflow.com/a/59276835 + __nonlocal = ('nonlocal cell',) if PY3 else ('',) + exec('''def _setattr(cell, name, value): + if type(cell) is CellType and name == 'cell_contents': + def cell_setter(value): + %s + cell = value # pylint: disable=unused-variable + func = FunctionType(cell_setter.__code__, globals(), "", None, (cell,)) # same as cell_setter, but with cell being the cell's contents + func(value) + else: + setattr(cell, name, value)''' % __nonlocal) + + exec('''def _delattr(cell, name): + if type(cell) is CellType and name == 'cell_contents': + def cell_deleter(value): + %s + del cell # pylint: disable=unused-variable + func = FunctionType(cell_deleter.__code__, globals(), "", None, (cell,)) # same as cell_deleter, but with cell being the cell's contents + func(value) + else: + delattr(cell, name)''' % __nonlocal) + +else: + def _create_cell(contents=None): return (lambda: contents).func_closure[0] - def _setattr(object, name, value): - if type(object) is CellType and name == 'cell_contents': - raise SyntaxError('Not possible to edit a cell in PyPy2') - else: - setattr(object, name, value) + if IS_PYPY: + from . import nonlocals as _nonlocals + @_nonlocals.export_nonlocals('cellv') + def _setattr(cell, name, value): + if type(cell) is CellType and name == 'cell_contents': + cellv = None + @_nonlocals.nonlocals('cellv', closure_override=(cell,)) + def cell_setter(value): + cellv = value # pylint: disable=unused-variable + cell_setter(value) + else: + setattr(cell, name, value) - def _delattr(object, name): - if type(object) is CellType and name == 'cell_contents': - raise SyntaxError('Not possible to edit a cell in PyPy2') - else: - delattr(object, name) + def _delattr(object, name): + if type(object) is CellType and name == 'cell_contents': + raise NotImplementedError('Empty cells in PyPy2') + else: + delattr(object, name) -else: # CPython 2 - def _create_cell(contents=_CELL_REF): - return (lambda: contents).func_closure[0] + else: # CPython 2 + def _create_cell(contents=None): + return (lambda: contents).func_closure[0] - _PyCell_Set = ctypes.pythonapi.PyCell_Set + _PyCell_Set = ctypes.pythonapi.PyCell_Set - def _setattr(object, name, value): - if type(object) is CellType and name == 'cell_contents': - _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) - _PyCell_Set(object, value) - else: - setattr(object, name, value) + def _setattr(object, name, value): + if type(object) is CellType and name == 'cell_contents': + _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) + _PyCell_Set(object, value) + else: + setattr(object, name, value) - def _delattr(object, name): - if type(object) is CellType and name == 'cell_contents': - _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) - _PyCell_Set(object, None) - else: - delattr(object, name) + def _delattr(object, name): + if type(object) is CellType and name == 'cell_contents': + _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) + _PyCell_Set(object, None) + else: + delattr(object, name) _setattr_shim = BuiltinShim('_setattr', setattr) _delattr_shim = BuiltinShim('_delattr', delattr) @@ -1445,7 +1448,7 @@ def save_cell(pickler, obj): pickler.write('0') # pragma: no cover log.info("# Ce3") return - if not IS_PYPY2 and is_dill(pickler, child=True): + if is_dill(pickler, child=True): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: log.info("Ce2: %s" % obj) diff --git a/dill/nonlocals.py b/dill/nonlocals.py new file mode 100644 index 00000000..24150b9f --- /dev/null +++ b/dill/nonlocals.py @@ -0,0 +1,122 @@ +# This file is only needed for PyPy2. It can be removed when support is dropped. +# https://code.activestate.com/recipes/578965-python-2-nonlocal/ + +import inspect, types, dis + +__all__ = ['export_nonlocals', 'nonlocals'] + +# http://www.jonathon-vogel.com/posts/patching_function_bytecode_with_python/ +def find_code(code, f): + i = 0 + while i < len(code): + if f(code, i): + return i + elif code[i] < dis.HAVE_ARGUMENT: + i += 1 + else: + i += 3 + +# http://nedbatchelder.com/blog/201301/byterun_and_making_cells.html +def make_cell(value): + return (lambda x: lambda: x)(value).func_closure[0] + +globals().update(dis.opmap) + +def export_nonlocals(*vars): + def func(f): + code = map(ord, f.func_code.co_code) + varnames = list(f.func_code.co_varnames) + names = list(f.func_code.co_names) + cf=lambda c,i:c[i] in (LOAD_FAST,STORE_FAST) and varnames[c[i+1]] in vars + while True: + idx = find_code(code, cf) + if idx is None: + break + code[idx] = LOAD_NAME if code[idx] == LOAD_FAST else STORE_NAME + var = varnames[code[idx+1]] + code[idx+1] = len(names) + try: + code[idx+1] = names.index(var) + except ValueError: + names.append(var) + for i, var in enumerate(filter(varnames.__contains__, names)): + varnames[varnames.index(var)] = '__anon_var_%d' % i + rescode = types.CodeType(f.func_code.co_argcount, f.func_code.co_nlocals, + f.func_code.co_stacksize, + f.func_code.co_flags^0x01, + ''.join(map(chr, code)), f.func_code.co_consts, + tuple(names), tuple(varnames), + f.func_code.co_filename, f.func_code.co_name, + f.func_code.co_firstlineno, + f.func_code.co_lnotab, f.func_code.co_freevars, + f.func_code.co_cellvars) + return types.FunctionType(rescode, dict(f.func_globals, __ns=True), + f.func_name, f.func_defaults, f.func_closure) + return func + +def nonlocals(*vars, **kwargs): + def func(f): + caller = inspect.stack()[1][0] + caller_vars = caller.f_globals + caller_vars.update(caller.f_locals) + code = map(ord, f.func_code.co_code) + varmap = {} + freevars = list(f.func_code.co_freevars) + freec = len(freevars) + freeoffs = len(f.func_code.co_cellvars) + varnames = list(f.func_code.co_varnames) + closure = list(f.func_closure or []) + names = list(f.func_code.co_names) + consts = list(f.func_code.co_consts) + fglobals = {'__nonlocal_plocals': caller.f_locals} + names.extend(fglobals.keys()) + plocals_pos = len(names)-1 + offs = 0 + closure_override = kwargs.get('closure_override', None) + def cf(c, i): + if c[i] in (LOAD_FAST, STORE_FAST) and varnames[c[i+1]] in vars: + return True + elif c[i] in dis.hasjabs: + c[i+1] += offs + while True: + idx = find_code(code, cf) + if idx is None: + break + code[idx] = LOAD_DEREF if code[idx] == LOAD_FAST else STORE_DEREF + var = varnames[code[idx+1]] + code[idx+1] = len(freevars) + try: + code[idx+1] = freevars.index(var) + except ValueError: + freevars.append(var) + code[idx+1] += freeoffs + if code[idx] == STORE_DEREF and caller_vars.get('__ns') == True: + const_id = len(consts) + try: + const_id = consts.index(var) + except ValueError: + consts.append(var) + code.insert(idx, DUP_TOP) + code[idx+4:idx+4] = [ + LOAD_GLOBAL, plocals_pos, 0, + LOAD_CONST, const_id, 0, + STORE_SUBSCR + ] + offs += 4 + nlocals = len(freevars)-freec+f.func_code.co_nlocals + if closure_override is None: + closure.extend(map(make_cell, map(caller_vars.__getitem__, + freevars[freec:]))) + else: + closure.extend(closure_override) + rescode = types.CodeType(f.func_code.co_argcount, nlocals, + f.func_code.co_stacksize, f.func_code.co_flags, + ''.join(map(chr, code)), tuple(consts), + tuple(names), tuple(varnames), + f.func_code.co_filename, f.func_code.co_name, + f.func_code.co_firstlineno, + f.func_code.co_lnotab, tuple(freevars), + f.func_code.co_cellvars) + return types.FunctionType(rescode, dict(f.func_globals, **fglobals), + f.func_name, f.func_defaults, tuple(closure)) + return func diff --git a/tests/test_recursive.py b/tests/test_recursive.py index 45f0aeaf..8287cba3 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -92,9 +92,8 @@ def __init__(self): def test_circular_reference(): - if not IS_PYPY2: - assert dill.copy(obj4()) - obj4_copy = dill.loads(dill.dumps(obj4())) + assert dill.copy(obj4()) + obj4_copy = dill.loads(dill.dumps(obj4())) if PY3: assert type(obj4_copy) is type(obj4_copy).__init__.__closure__[0].cell_contents assert type(obj4_copy.b) is type(obj4_copy.b).__init__.__closure__[0].cell_contents @@ -107,8 +106,7 @@ def g(): def test_function_cells(): - if not IS_PYPY2: - assert dill.copy(f()) + assert dill.copy(f()) if __name__ == '__main__': From 9ae9552fdc2367d6bdae96534077afc41e5e7b1c Mon Sep 17 00:00:00 2001 From: anivegesana Date: Fri, 24 Dec 2021 14:08:47 -0500 Subject: [PATCH 32/46] Fix small issue --- dill/_dill.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 1e93ebf1..323726bf 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -523,7 +523,7 @@ def _exit_recursive_cell_stack(pickler, obj, is_pickler_dill=None): if PY3: pickler.write(bytes('0', 'UTF-8')) else: - pickler.write('0') # pragma: no cover + pickler.write('0') ### Extend the Picklers class Pickler(StockPickler): @@ -968,11 +968,11 @@ def cell_setter(value): exec('''def _delattr(cell, name): if type(cell) is CellType and name == 'cell_contents': - def cell_deleter(value): + def cell_deleter(): %s del cell # pylint: disable=unused-variable func = FunctionType(cell_deleter.__code__, globals(), "", None, (cell,)) # same as cell_deleter, but with cell being the cell's contents - func(value) + func() else: delattr(cell, name)''' % __nonlocal) @@ -1445,7 +1445,7 @@ def save_cell(pickler, obj): if PY3: pickler.write(bytes('0', 'UTF-8')) else: - pickler.write('0') # pragma: no cover + pickler.write('0') log.info("# Ce3") return if is_dill(pickler, child=True): From fe842e074d6ce03dd4a5ab29b9a278b8b6446e7c Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sat, 25 Dec 2021 12:28:27 -0500 Subject: [PATCH 33/46] Clean up _create_cell --- dill/_dill.py | 78 ++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 323726bf..498a1d58 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -519,7 +519,7 @@ def _exit_recursive_cell_stack(pickler, obj, is_pickler_dill=None): # assert i == len(pickler._recursive_cells), 'Stack tampered!' for t in recursive_cells: pickler.save_reduce(_setattr_shim, (t, 'cell_contents', obj)) - # pop None off created by _setattr off stack + # pop None created by _setattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) else: @@ -951,11 +951,38 @@ def __getattribute__(self, attr): def _create_cell(contents=None): return (lambda: contents).__closure__[0] - if OLD37: - # Python 3.0 to 3.6 is in a weird case, where it is possible to pickle - # recursive cells, we can't assign directly to the cell. +else: + def _create_cell(contents=None): + return (lambda: contents).func_closure[0] + + +if OLD37: + if not IS_PYPY and hasattr(ctypes.pythonapi, 'PyCell_Set'): + # CPython + + _PyCell_Set = ctypes.pythonapi.PyCell_Set + + def _setattr(object, name, value): + if type(object) is CellType and name == 'cell_contents': + _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) + _PyCell_Set(object, value) + else: + setattr(object, name, value) + + def _delattr(object, name): + if type(object) is CellType and name == 'cell_contents': + _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) + _PyCell_Set(object, None) + else: + delattr(object, name) + + # General Python (not CPython) up to 3.6 is in a weird case, where it is + # possible to pickle recursive cells, but we can't assign directly to the + # cell. + elif PY3: + # Use nonlocal variables to reassign the cell value. # https://stackoverflow.com/a/59276835 - __nonlocal = ('nonlocal cell',) if PY3 else ('',) + __nonlocal = ('nonlocal cell',) exec('''def _setattr(cell, name, value): if type(cell) is CellType and name == 'cell_contents': def cell_setter(value): @@ -976,11 +1003,9 @@ def cell_deleter(): else: delattr(cell, name)''' % __nonlocal) -else: - def _create_cell(contents=None): - return (lambda: contents).func_closure[0] - - if IS_PYPY: + else: + # Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode + # manipulation. from . import nonlocals as _nonlocals @_nonlocals.export_nonlocals('cellv') def _setattr(cell, name, value): @@ -993,31 +1018,8 @@ def cell_setter(value): else: setattr(cell, name, value) - def _delattr(object, name): - if type(object) is CellType and name == 'cell_contents': - raise NotImplementedError('Empty cells in PyPy2') - else: - delattr(object, name) - - else: # CPython 2 - def _create_cell(contents=None): - return (lambda: contents).func_closure[0] - - _PyCell_Set = ctypes.pythonapi.PyCell_Set - - def _setattr(object, name, value): - if type(object) is CellType and name == 'cell_contents': - _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) - _PyCell_Set(object, value) - else: - setattr(object, name, value) - - def _delattr(object, name): - if type(object) is CellType and name == 'cell_contents': - _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) - _PyCell_Set(object, None) - else: - delattr(object, name) + # Empty cells are not possible in this case. When unpickling, this + # case will throw an error that 'cell_contents' is get only _setattr_shim = BuiltinShim('_setattr', setattr) _delattr_shim = BuiltinShim('_delattr', delattr) @@ -1440,8 +1442,10 @@ def save_cell(pickler, obj): except: log.info("Ce3: %s" % obj) pickler.save_reduce(_create_cell, (_CELL_REF_shim,), obj=obj) + # Call the function _delattr on the cell's cell_contents attribute + # The result of this function call will be None pickler.save_reduce(_delattr_shim, (obj, 'cell_contents')) - # pop None off created by _delattr off stack + # pop None created by calling _delattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) else: @@ -1724,8 +1728,6 @@ def save_function(pickler, obj): globs = globalvars(obj, recurse=True, builtin=True) else: globs = obj.__globals__ if PY3 else obj.func_globals - #print("stack: %s + '%s'" % (set(hex(i) for i in stack),hex(id(obj)))) - # stack[id(obj)] = len(stack), obj if _memo: pickler._recurse = False else: From ac943212fbb8d17c0b4b9cf540ea82d479795b95 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sat, 25 Dec 2021 16:51:44 -0500 Subject: [PATCH 34/46] Empty cells in PyPy2 --- dill/_dill.py | 18 +++++++++++++++--- tests/test_functions.py | 4 ++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 498a1d58..b5c46602 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -945,8 +945,11 @@ def __getattribute__(self, attr): # do not have a default value. # Can be safely replaced removed entirely (replaced by empty tuples for calls to # _create_cell) once breaking changes are allowed. +_CELL_EMPTY = sentinel('_CELL_EMPTY') +_CELL_EMPTY_shim = BuiltinShim('_CELL_EMPTY', None) _CELL_REF_shim = None + if PY3: def _create_cell(contents=None): return (lambda: contents).__closure__[0] @@ -1006,6 +1009,11 @@ def cell_deleter(): else: # Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode # manipulation. + def _create_cell(contents=None): + if contents is not _CELL_EMPTY: + value = contents + return (lambda: value).func_closure[0] + from . import nonlocals as _nonlocals @_nonlocals.export_nonlocals('cellv') def _setattr(cell, name, value): @@ -1018,8 +1026,12 @@ def cell_setter(value): else: setattr(cell, name, value) - # Empty cells are not possible in this case. When unpickling, this - # case will throw an error that 'cell_contents' is get only + def _delattr(cell, name): + if type(cell) is CellType and name == 'cell_contents': + pass + else: + delattr(cell, name) + _setattr_shim = BuiltinShim('_setattr', setattr) _delattr_shim = BuiltinShim('_delattr', delattr) @@ -1441,7 +1453,7 @@ def save_cell(pickler, obj): f = obj.cell_contents except: log.info("Ce3: %s" % obj) - pickler.save_reduce(_create_cell, (_CELL_REF_shim,), obj=obj) + pickler.save_reduce(_create_cell, (_CELL_EMPTY_shim,), obj=obj) # Call the function _delattr on the cell's cell_contents attribute # The result of this function call will be None pickler.save_reduce(_delattr_shim, (obj, 'cell_contents')) diff --git a/tests/test_functions.py b/tests/test_functions.py index f93598a2..fed9d55e 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -74,8 +74,8 @@ def test_functions(): assert dill.loads(dumped_func_e)(1, 2, e2=3, e3=4) == 10 assert dill.loads(dumped_func_e)(1, 2, 3, e2=4) == 12 assert dill.loads(dumped_func_e)(1, 2, 3, e2=4, e3=5) == 15''') - elif platform.python_implementation() == 'CPython': - empty_cell = dill._dill._create_cell() + else: + empty_cell = dill._dill._create_cell(dill._dill._CELL_EMPTY) dill._dill._delattr(empty_cell, 'cell_contents') cell_copy = dill.loads(dill.dumps(empty_cell)) assert 'empty' in str(cell_copy) From 5b60c926111cd28c5ac8c01df2fccfc564a79b68 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Sun, 26 Dec 2021 15:49:06 -0500 Subject: [PATCH 35/46] Only two _create_cell functions --- dill/_dill.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index b5c46602..cbea46c2 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -956,7 +956,9 @@ def _create_cell(contents=None): else: def _create_cell(contents=None): - return (lambda: contents).func_closure[0] + if contents is not _CELL_EMPTY: + value = contents + return (lambda: value).func_closure[0] if OLD37: @@ -1009,11 +1011,6 @@ def cell_deleter(): else: # Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode # manipulation. - def _create_cell(contents=None): - if contents is not _CELL_EMPTY: - value = contents - return (lambda: value).func_closure[0] - from . import nonlocals as _nonlocals @_nonlocals.export_nonlocals('cellv') def _setattr(cell, name, value): From 71c9aaacf53de3a34e50a65e89da06b11cb1219a Mon Sep 17 00:00:00 2001 From: anivegesana Date: Mon, 27 Dec 2021 20:10:16 -0500 Subject: [PATCH 36/46] Fixes from review --- dill/_dill.py | 131 +-------------------- dill/{nonlocals.py => _nonlocals.py} | 0 dill/detect.py | 22 +++- dill/shims.py | 165 +++++++++++++++++++++++++++ tests/test_functions.py | 29 +++-- tests/test_recursive.py | 2 +- 6 files changed, 203 insertions(+), 146 deletions(-) rename dill/{nonlocals.py => _nonlocals.py} (100%) create mode 100644 dill/shims.py diff --git a/dill/_dill.py b/dill/_dill.py index cbea46c2..4e3d2909 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -259,6 +259,7 @@ def get_file_type(*args, **kwargs): try: ExitType = type(exit) # apparently 'exit' can be removed except NameError: ExitType = None singletontypes = [] +from . import shims ### File modes #: Pickles the file handle, preserving mode. The position of the unpickled @@ -446,44 +447,6 @@ def load_session(filename='/tmp/session.pkl', main=None, **kwds): ### End: Pickle the Interpreter -class sentinel(object): - """ - Create a unique sentinel object that is pickled as a constant. - """ - def __init__(self, name): - self.name = name - def __repr__(self): - return __name__ + '.' + self.name # pragma: no cover - def __copy__(self): - return self # pragma: no cover - def __deepcopy__(self, memo): - return self # pragma: no cover - def __reduce__(self): - return self.name - def __reduce_ex__(self, protocol): - return self.name - -class BuiltinShim(object): - """ - Refers to a shim function in dill._dill if it exists and to a builtin - function if it doesn't exist. This choice is made during the unpickle - step instead of the pickling process. - """ - def __init__(self, shim_name, builtin): - self.shim_name = shim_name - self.builtin = builtin - self.__call__ = getattr(globals(), shim_name, builtin) - def __copy__(self): - return self # pragma: no cover - def __deepcopy__(self, memo): - return self # pragma: no cover - def __call__(self, *args, **kwargs): - return getattr(globals(), shim_name, builtin)(*args, **kwargs) # pragma: no cover - def __reduce__(self): - return (getattr, (sys.modules[__name__], self.shim_name, self.builtin)) - def __reduce_ex__(self, protocol): - return self.__reduce__() - class MetaCatchingDict(dict): def get(self, key, default=None): try: @@ -518,7 +481,7 @@ def _exit_recursive_cell_stack(pickler, obj, is_pickler_dill=None): i, recursive_cells = pickler._recursive_cells.pop(id(obj)) # assert i == len(pickler._recursive_cells), 'Stack tampered!' for t in recursive_cells: - pickler.save_reduce(_setattr_shim, (t, 'cell_contents', obj)) + pickler.save_reduce(shims._setattr, (t, 'cell_contents', obj)) # pop None created by _setattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) @@ -939,17 +902,6 @@ def __getattribute__(self, attr): attrs[index] = ".".join([attrs[index], attr]) return type(self)(attrs, index) - - -# Used to stay compatible with versions of dill whose _create_cell functions -# do not have a default value. -# Can be safely replaced removed entirely (replaced by empty tuples for calls to -# _create_cell) once breaking changes are allowed. -_CELL_EMPTY = sentinel('_CELL_EMPTY') -_CELL_EMPTY_shim = BuiltinShim('_CELL_EMPTY', None) -_CELL_REF_shim = None - - if PY3: def _create_cell(contents=None): return (lambda: contents).__closure__[0] @@ -961,79 +913,6 @@ def _create_cell(contents=None): return (lambda: value).func_closure[0] -if OLD37: - if not IS_PYPY and hasattr(ctypes.pythonapi, 'PyCell_Set'): - # CPython - - _PyCell_Set = ctypes.pythonapi.PyCell_Set - - def _setattr(object, name, value): - if type(object) is CellType and name == 'cell_contents': - _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) - _PyCell_Set(object, value) - else: - setattr(object, name, value) - - def _delattr(object, name): - if type(object) is CellType and name == 'cell_contents': - _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) - _PyCell_Set(object, None) - else: - delattr(object, name) - - # General Python (not CPython) up to 3.6 is in a weird case, where it is - # possible to pickle recursive cells, but we can't assign directly to the - # cell. - elif PY3: - # Use nonlocal variables to reassign the cell value. - # https://stackoverflow.com/a/59276835 - __nonlocal = ('nonlocal cell',) - exec('''def _setattr(cell, name, value): - if type(cell) is CellType and name == 'cell_contents': - def cell_setter(value): - %s - cell = value # pylint: disable=unused-variable - func = FunctionType(cell_setter.__code__, globals(), "", None, (cell,)) # same as cell_setter, but with cell being the cell's contents - func(value) - else: - setattr(cell, name, value)''' % __nonlocal) - - exec('''def _delattr(cell, name): - if type(cell) is CellType and name == 'cell_contents': - def cell_deleter(): - %s - del cell # pylint: disable=unused-variable - func = FunctionType(cell_deleter.__code__, globals(), "", None, (cell,)) # same as cell_deleter, but with cell being the cell's contents - func() - else: - delattr(cell, name)''' % __nonlocal) - - else: - # Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode - # manipulation. - from . import nonlocals as _nonlocals - @_nonlocals.export_nonlocals('cellv') - def _setattr(cell, name, value): - if type(cell) is CellType and name == 'cell_contents': - cellv = None - @_nonlocals.nonlocals('cellv', closure_override=(cell,)) - def cell_setter(value): - cellv = value # pylint: disable=unused-variable - cell_setter(value) - else: - setattr(cell, name, value) - - def _delattr(cell, name): - if type(cell) is CellType and name == 'cell_contents': - pass - else: - delattr(cell, name) - - -_setattr_shim = BuiltinShim('_setattr', setattr) -_delattr_shim = BuiltinShim('_delattr', delattr) - - def _create_weakref(obj, *args): from weakref import ref if obj is None: # it's dead @@ -1450,10 +1329,10 @@ def save_cell(pickler, obj): f = obj.cell_contents except: log.info("Ce3: %s" % obj) - pickler.save_reduce(_create_cell, (_CELL_EMPTY_shim,), obj=obj) + pickler.save_reduce(_create_cell, (shims._CELL_EMPTY,), obj=obj) # Call the function _delattr on the cell's cell_contents attribute # The result of this function call will be None - pickler.save_reduce(_delattr_shim, (obj, 'cell_contents')) + pickler.save_reduce(shims._delattr, (obj, 'cell_contents')) # pop None created by calling _delattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) @@ -1465,7 +1344,7 @@ def save_cell(pickler, obj): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: log.info("Ce2: %s" % obj) - pickler.save_reduce(_create_cell, (_CELL_REF_shim,), obj=obj) + pickler.save_reduce(_create_cell, (_CELL_REF,), obj=obj) recursive_cells[1].append(obj) log.info("# Ce2") return diff --git a/dill/nonlocals.py b/dill/_nonlocals.py similarity index 100% rename from dill/nonlocals.py rename to dill/_nonlocals.py diff --git a/dill/detect.py b/dill/detect.py index 59abee3f..3e5768ae 100644 --- a/dill/detect.py +++ b/dill/detect.py @@ -157,7 +157,16 @@ def freevars(func): func = getattr(func, func_code).co_freevars # get freevars else: return {} - return dict((name,c.cell_contents) for (name,c) in zip(func,closures)) + + def get_cell_contents(): + for (name,c) in zip(func,closures): + try: + cell_contents = c.cell_contents + except: + continue + yield (name,c.cell_contents) + + return dict(get_cell_contents()) # thanks to Davies Liu for recursion of globals def nestedglobals(func, recurse=True): @@ -201,9 +210,14 @@ def globalvars(func, recurse=True, builtin=False): # get references from within closure orig_func, func = func, set() for obj in getattr(orig_func, func_closure) or {}: - _vars = globalvars(obj.cell_contents, recurse, builtin) or {} - func.update(_vars) #XXX: (above) be wary of infinte recursion? - globs.update(_vars) + try: + cell_contents = obj.cell_contents + except: + pass + else: + _vars = globalvars(cell_contents, recurse, builtin) or {} + func.update(_vars) #XXX: (above) be wary of infinte recursion? + globs.update(_vars) # get globals globs.update(getattr(orig_func, func_globals) or {}) # get names of references diff --git a/dill/shims.py b/dill/shims.py new file mode 100644 index 00000000..ec4c491c --- /dev/null +++ b/dill/shims.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +# +# Author: Mike McKerns (mmckerns @caltech and @uqfoundation) +# Copyright (c) 2008-2016 California Institute of Technology. +# Copyright (c) 2016-2021 The Uncertainty Quantification Foundation. +# License: 3-clause BSD. The full license text is available at: +# - https://github.com/uqfoundation/dill/blob/master/LICENSE +""" +Provides shims for compatibility between versions of dill and Python. +""" + +import inspect, sys + +# "Import" all the types and version conditions from dill._dill +globals().update(sys.modules['dill._dill'].__dict__) + +# The values for the shims for this particular version of dill and Python. +_dill_true_values = {} + +class Sentinel(object): + """ + Create a unique sentinel object that is pickled as a constant. + """ + def __init__(self, name, module=None): + self.name = name + if _dill_true_values is not None: + _dill_true_values[name] = self + self.module_name = 'dill._dill' + elif module is None: + # Use the calling function's module + self.module_name = inspect.currentframe().f_back.f_globals['__name__'] + else: + self.module_name = module.__name__ + def __repr__(self): + return self.module_name + '.' + self.name # pragma: no cover + def __copy__(self): + return self # pragma: no cover + def __deepcopy__(self, memo): + return self # pragma: no cover + def __reduce__(self): + return self.name + def __reduce_ex__(self, protocol): + return self.name + +class Shim(object): + """ + Refers to a shim function in dill._dill if it exists and to another + function if it doesn't exist. This choice is made during the unpickle + step instead of the pickling process. + """ + def __new__(cls, name, alternative, module=None): + if callable(alternative): + return object.__new__(_CallableShim) + else: + return object.__new__(cls) + def __init__(self, name, alternative, module=None): + if _dill_true_values is not None: + self.module = sys.modules['dill._dill'] + g = globals() + if name in g: + _dill_true_values[name] = g[name] + elif module is None: + # Use the calling function's module + self.module = sys.modules[inspect.currentframe().f_back.f_globals['__name__']] + else: + self.module = module + self.name = name + self.alternative = alternative + def __copy__(self): + return self # pragma: no cover + def __deepcopy__(self, memo): + return self # pragma: no cover + def __reduce__(self): + return (getattr, (self.module, self.name, self.alternative)) + def __reduce_ex__(self, protocol): + return self.__reduce__() + +class _CallableShim(Shim): + def __call__(self, *args, **kwargs): + return getattr(self.module, self.name, self.alternative)(*args, **kwargs) # pragma: no cover + +# Used to stay compatible with versions of dill whose _create_cell functions +# do not have a default value. +# Can be safely replaced removed entirely (replaced by empty tuples for calls to +# _create_cell) once breaking changes are allowed. +_CELL_EMPTY = Sentinel('_CELL_EMPTY') +_CELL_EMPTY = Shim('_CELL_EMPTY', None) +_dill_true_values['_CELL_REF'] = None + + +if OLD37: + if HAS_CTYPES and hasattr(ctypes, 'pythonapi') and hasattr(ctypes.pythonapi, 'PyCell_Set'): + # CPython + + _PyCell_Set = ctypes.pythonapi.PyCell_Set + + def _setattr(object, name, value): + if type(object) is CellType and name == 'cell_contents': + _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) + _PyCell_Set(object, value) + else: + setattr(object, name, value) + + def _delattr(object, name): + if type(object) is CellType and name == 'cell_contents': + _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) + _PyCell_Set(object, None) + else: + delattr(object, name) + + # General Python (not CPython) up to 3.6 is in a weird case, where it is + # possible to pickle recursive cells, but we can't assign directly to the + # cell. + elif PY3: + # Use nonlocal variables to reassign the cell value. + # https://stackoverflow.com/a/59276835 + __nonlocal = ('nonlocal cell',) + exec('''def _setattr(cell, name, value): + if type(cell) is CellType and name == 'cell_contents': + def cell_setter(value): + %s + cell = value # pylint: disable=unused-variable + func = FunctionType(cell_setter.__code__, globals(), "", None, (cell,)) # same as cell_setter, but with cell being the cell's contents + func(value) + else: + setattr(cell, name, value)''' % __nonlocal) + + exec('''def _delattr(cell, name): + if type(cell) is CellType and name == 'cell_contents': + def cell_deleter(): + %s + del cell # pylint: disable=unused-variable + func = FunctionType(cell_deleter.__code__, globals(), "", None, (cell,)) # same as cell_deleter, but with cell being the cell's contents + func() + else: + delattr(cell, name)''' % __nonlocal) + + else: + # Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode + # manipulation. + from . import _nonlocals + @_nonlocals.export_nonlocals('cellv') + def _setattr(cell, name, value): + if type(cell) is CellType and name == 'cell_contents': + cellv = None + @_nonlocals.nonlocals('cellv', closure_override=(cell,)) + def cell_setter(value): + cellv = value # pylint: disable=unused-variable + cell_setter(value) + else: + setattr(cell, name, value) + + def _delattr(cell, name): + if type(cell) is CellType and name == 'cell_contents': + pass + else: + delattr(cell, name) + + +_setattr = Shim('_setattr', setattr) +_delattr = Shim('_delattr', delattr) + +# Update dill._dill with shim functions +sys.modules['dill._dill'].__dict__.update(_dill_true_values) +_dill_true_values = None diff --git a/tests/test_functions.py b/tests/test_functions.py index fed9d55e..cf0f9720 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -7,7 +7,6 @@ import dill import sys -import platform dill.settings['recurse'] = True @@ -37,12 +36,10 @@ def function_e(e, *e1, e2=1, e3=2): return e + sum(e1) + e2 + e3''') -# https://stackoverflow.com/a/45661180 -if is_py3(): - exec('''def make_empty_cell(): +def make_empty_cell(): if False: - del value - return (lambda: value).__closure__[0]''') + value = None + return (lambda: value) def test_functions(): @@ -61,11 +58,18 @@ def test_functions(): assert dill.loads(dumped_func_d)(1, 2, 3) == 6 assert dill.loads(dumped_func_d)(1, 2, d2=3) == 6 - if is_py3(): - empty_cell = make_empty_cell() - cell_copy = dill.loads(dill.dumps(empty_cell)) - assert 'empty' in str(cell_copy) + empty_cell = make_empty_cell() + cell_copy = dill.loads(dill.dumps(empty_cell)) + assert 'empty' in str(cell_copy.__closure__[0]) + try: + cell_copy() + except: + # this is good + pass + else: + raise AssertionError('cell_copy() did not read an empty cell') + if is_py3(): exec(''' dumped_func_e = dill.dumps(function_e) assert dill.loads(dumped_func_e)(1, 2) == 6 @@ -74,11 +78,6 @@ def test_functions(): assert dill.loads(dumped_func_e)(1, 2, e2=3, e3=4) == 10 assert dill.loads(dumped_func_e)(1, 2, 3, e2=4) == 12 assert dill.loads(dumped_func_e)(1, 2, 3, e2=4, e3=5) == 15''') - else: - empty_cell = dill._dill._create_cell(dill._dill._CELL_EMPTY) - dill._dill._delattr(empty_cell, 'cell_contents') - cell_copy = dill.loads(dill.dumps(empty_cell)) - assert 'empty' in str(cell_copy) if __name__ == '__main__': test_functions() diff --git a/tests/test_recursive.py b/tests/test_recursive.py index 8287cba3..ecd4536f 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -6,7 +6,7 @@ # - https://github.com/uqfoundation/dill/blob/master/LICENSE import dill -from dill._dill import PY3, IS_PYPY2 +from dill._dill import PY3 from functools import partial class obj1(object): From dfdfc403fdea804952a75bba3d13d0b085f0b57f Mon Sep 17 00:00:00 2001 From: anivegesana Date: Mon, 27 Dec 2021 20:39:04 -0500 Subject: [PATCH 37/46] Was probably not a good idea They can probably implement it better for their own use-case or it can be added later. --- dill/_dill.py | 27 +++++++++++++--- dill/{shims.py => _shims.py} | 61 +++++++++++------------------------- 2 files changed, 41 insertions(+), 47 deletions(-) rename dill/{shims.py => _shims.py} (66%) diff --git a/dill/_dill.py b/dill/_dill.py index 4e3d2909..18a7b96a 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -259,7 +259,26 @@ def get_file_type(*args, **kwargs): try: ExitType = type(exit) # apparently 'exit' can be removed except NameError: ExitType = None singletontypes = [] -from . import shims + +### Shims for different versions of Python and dill +class Sentinel(object): + """ + Create a unique sentinel object that is pickled as a constant. + """ + def __init__(self, name, module=None): + self.name = name + def __repr__(self): + return __name__ + '.' + self.name # pragma: no cover + def __copy__(self): + return self # pragma: no cover + def __deepcopy__(self, memo): + return self # pragma: no cover + def __reduce__(self): + return self.name + def __reduce_ex__(self, protocol): + return self.name + +from . import _shims ### File modes #: Pickles the file handle, preserving mode. The position of the unpickled @@ -481,7 +500,7 @@ def _exit_recursive_cell_stack(pickler, obj, is_pickler_dill=None): i, recursive_cells = pickler._recursive_cells.pop(id(obj)) # assert i == len(pickler._recursive_cells), 'Stack tampered!' for t in recursive_cells: - pickler.save_reduce(shims._setattr, (t, 'cell_contents', obj)) + pickler.save_reduce(_shims._setattr, (t, 'cell_contents', obj)) # pop None created by _setattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) @@ -1329,10 +1348,10 @@ def save_cell(pickler, obj): f = obj.cell_contents except: log.info("Ce3: %s" % obj) - pickler.save_reduce(_create_cell, (shims._CELL_EMPTY,), obj=obj) + pickler.save_reduce(_create_cell, (_shims._CELL_EMPTY,), obj=obj) # Call the function _delattr on the cell's cell_contents attribute # The result of this function call will be None - pickler.save_reduce(shims._delattr, (obj, 'cell_contents')) + pickler.save_reduce(_shims._delattr, (obj, 'cell_contents')) # pop None created by calling _delattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) diff --git a/dill/shims.py b/dill/_shims.py similarity index 66% rename from dill/shims.py rename to dill/_shims.py index ec4c491c..ec6e8091 100644 --- a/dill/shims.py +++ b/dill/_shims.py @@ -11,42 +11,16 @@ import inspect, sys -# "Import" all the types and version conditions from dill._dill -globals().update(sys.modules['dill._dill'].__dict__) +_dill = sys.modules['dill._dill'] # The values for the shims for this particular version of dill and Python. _dill_true_values = {} -class Sentinel(object): - """ - Create a unique sentinel object that is pickled as a constant. - """ - def __init__(self, name, module=None): - self.name = name - if _dill_true_values is not None: - _dill_true_values[name] = self - self.module_name = 'dill._dill' - elif module is None: - # Use the calling function's module - self.module_name = inspect.currentframe().f_back.f_globals['__name__'] - else: - self.module_name = module.__name__ - def __repr__(self): - return self.module_name + '.' + self.name # pragma: no cover - def __copy__(self): - return self # pragma: no cover - def __deepcopy__(self, memo): - return self # pragma: no cover - def __reduce__(self): - return self.name - def __reduce_ex__(self, protocol): - return self.name - class Shim(object): """ - Refers to a shim function in dill._dill if it exists and to another - function if it doesn't exist. This choice is made during the unpickle - step instead of the pickling process. + A wrapper object that refers to a shim object in the module if it exists and + to another object if it doesn't exist. This choice is made during the + unpickle step instead of the pickling process. """ def __new__(cls, name, alternative, module=None): if callable(alternative): @@ -55,7 +29,7 @@ def __new__(cls, name, alternative, module=None): return object.__new__(cls) def __init__(self, name, alternative, module=None): if _dill_true_values is not None: - self.module = sys.modules['dill._dill'] + self.module = _dill g = globals() if name in g: _dill_true_values[name] = g[name] @@ -83,26 +57,27 @@ def __call__(self, *args, **kwargs): # do not have a default value. # Can be safely replaced removed entirely (replaced by empty tuples for calls to # _create_cell) once breaking changes are allowed. -_CELL_EMPTY = Sentinel('_CELL_EMPTY') +_CELL_EMPTY = _dill.Sentinel('_CELL_EMPTY') _CELL_EMPTY = Shim('_CELL_EMPTY', None) _dill_true_values['_CELL_REF'] = None -if OLD37: - if HAS_CTYPES and hasattr(ctypes, 'pythonapi') and hasattr(ctypes.pythonapi, 'PyCell_Set'): +if _dill.OLD37: + if _dill.HAS_CTYPES and hasattr(_dill.ctypes, 'pythonapi') and hasattr(_dill.ctypes.pythonapi, 'PyCell_Set'): # CPython + ctypes = _dill.ctypes _PyCell_Set = ctypes.pythonapi.PyCell_Set def _setattr(object, name, value): - if type(object) is CellType and name == 'cell_contents': + if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) _PyCell_Set(object, value) else: setattr(object, name, value) def _delattr(object, name): - if type(object) is CellType and name == 'cell_contents': + if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) _PyCell_Set(object, None) else: @@ -111,26 +86,26 @@ def _delattr(object, name): # General Python (not CPython) up to 3.6 is in a weird case, where it is # possible to pickle recursive cells, but we can't assign directly to the # cell. - elif PY3: + elif _dill.PY3: # Use nonlocal variables to reassign the cell value. # https://stackoverflow.com/a/59276835 __nonlocal = ('nonlocal cell',) exec('''def _setattr(cell, name, value): - if type(cell) is CellType and name == 'cell_contents': + if type(cell) is _dill.CellType and name == 'cell_contents': def cell_setter(value): %s cell = value # pylint: disable=unused-variable - func = FunctionType(cell_setter.__code__, globals(), "", None, (cell,)) # same as cell_setter, but with cell being the cell's contents + func = _dill.FunctionType(cell_setter.__code__, globals(), "", None, (cell,)) # same as cell_setter, but with cell being the cell's contents func(value) else: setattr(cell, name, value)''' % __nonlocal) exec('''def _delattr(cell, name): - if type(cell) is CellType and name == 'cell_contents': + if type(cell) is _dill.CellType and name == 'cell_contents': def cell_deleter(): %s del cell # pylint: disable=unused-variable - func = FunctionType(cell_deleter.__code__, globals(), "", None, (cell,)) # same as cell_deleter, but with cell being the cell's contents + func = _dill.FunctionType(cell_deleter.__code__, globals(), "", None, (cell,)) # same as cell_deleter, but with cell being the cell's contents func() else: delattr(cell, name)''' % __nonlocal) @@ -141,7 +116,7 @@ def cell_deleter(): from . import _nonlocals @_nonlocals.export_nonlocals('cellv') def _setattr(cell, name, value): - if type(cell) is CellType and name == 'cell_contents': + if type(cell) is _dill.CellType and name == 'cell_contents': cellv = None @_nonlocals.nonlocals('cellv', closure_override=(cell,)) def cell_setter(value): @@ -151,7 +126,7 @@ def cell_setter(value): setattr(cell, name, value) def _delattr(cell, name): - if type(cell) is CellType and name == 'cell_contents': + if type(cell) is _dill.CellType and name == 'cell_contents': pass else: delattr(cell, name) From 8692e5d82dbca32dbf2da460decbad387eacd8ed Mon Sep 17 00:00:00 2001 From: anivegesana Date: Tue, 28 Dec 2021 22:46:36 -0500 Subject: [PATCH 38/46] Split part of Shim into GetAttrShim --- dill/_dill.py | 18 ++++- dill/_nonlocals.py | 27 +++++++- dill/_shims.py | 150 ++++++++++++++++++++++++++++++---------- tests/test_functions.py | 4 +- 4 files changed, 156 insertions(+), 43 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 18a7b96a..af400c29 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -265,7 +265,7 @@ class Sentinel(object): """ Create a unique sentinel object that is pickled as a constant. """ - def __init__(self, name, module=None): + def __init__(self, name): self.name = name def __repr__(self): return __name__ + '.' + self.name # pragma: no cover @@ -491,6 +491,7 @@ def _enter_recursive_cell_stack(pickler, obj, is_pickler_dill=None): l = [] pickler._recursive_cells[id(obj)] = (len(pickler._recursive_cells), l) return l + def _exit_recursive_cell_stack(pickler, obj, is_pickler_dill=None): if is_pickler_dill is None: is_pickler_dill = is_dill(pickler, child=True) @@ -927,6 +928,9 @@ def _create_cell(contents=None): else: def _create_cell(contents=None): + # _CELL_EMPTY is a sentinel object defined in _shims.py for specifying + # that a cell is empty. Is only needed in PyPy 2.7, but is included for + # compatibility across versions of Python. if contents is not _CELL_EMPTY: value = contents return (lambda: value).func_closure[0] @@ -1348,6 +1352,14 @@ def save_cell(pickler, obj): f = obj.cell_contents except: log.info("Ce3: %s" % obj) + # _shims._CELL_EMPTY is defined in _shims.py to support PyPy 2.7. + # It unpickles to a sentinel object _dill._CELL_EMPTY, also created in + # _shims.py. This object is not present in Python 3 because the cell's + # contents can be deleted in newer versions of Python. The shim object + # will instead unpickle to None if unpickled in Python 3. + + # When breaking changes are made to dill, (_shims._CELL_EMPTY,) can + # be replaced by () pickler.save_reduce(_create_cell, (_shims._CELL_EMPTY,), obj=obj) # Call the function _delattr on the cell's cell_contents attribute # The result of this function call will be None @@ -1363,6 +1375,9 @@ def save_cell(pickler, obj): recursive_cells = pickler._recursive_cells.get(id(f)) if recursive_cells is not None: log.info("Ce2: %s" % obj) + # _CELL_REF is defined in _shims.py to support older versions of + # dill. When breaking changes are made to dill, (_CELL_REF,) can + # be replaced by () pickler.save_reduce(_create_cell, (_CELL_REF,), obj=obj) recursive_cells[1].append(obj) log.info("# Ce2") @@ -1664,7 +1679,6 @@ def save_function(pickler, obj): #NOTE: workaround for #234; "partial" still is problematic for recurse if OLDER and not _byref and (_super or (not _super and _memo) or (not _super and not _memo and _recurse)): pickler.clear_memo() #if _memo: - # stack.remove(id(obj)) # #pickler.clear_memo() # #StockPickler.clear_memo(pickler) log.info("# F1") diff --git a/dill/_nonlocals.py b/dill/_nonlocals.py index 24150b9f..12b32f03 100644 --- a/dill/_nonlocals.py +++ b/dill/_nonlocals.py @@ -1,5 +1,28 @@ -# This file is only needed for PyPy2. It can be removed when support is dropped. -# https://code.activestate.com/recipes/578965-python-2-nonlocal/ +#!/usr/bin/env python +# +# Copyright (c) 2014 Ryan Gonzalez +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +This file is only needed for PyPy2. It can be removed when support is dropped. +https://code.activestate.com/recipes/578965-python-2-nonlocal/ +""" import inspect, types, dis diff --git a/dill/_shims.py b/dill/_shims.py index ec6e8091..722b6744 100644 --- a/dill/_shims.py +++ b/dill/_shims.py @@ -1,65 +1,139 @@ #!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) -# Copyright (c) 2008-2016 California Institute of Technology. -# Copyright (c) 2016-2021 The Uncertainty Quantification Foundation. +# Author: Anirudh Vegesana (avegesan@stanford.edu) +# Copyright (c) 2021 The Uncertainty Quantification Foundation. # License: 3-clause BSD. The full license text is available at: # - https://github.com/uqfoundation/dill/blob/master/LICENSE """ Provides shims for compatibility between versions of dill and Python. + +Compatibility shims should be provided in this file. Here are two simple example +use cases. + +Deprecation of constructor function: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Assume that we were transitioning _import_module in _dill.py to +the builtin function importlib.import_module when present. + +@_assign_to_dill_module +def _import_module(import_name): + ... # code already in _dill.py + +_import_module = GetAttrShim(importlib, 'import_module', GetAttrShim(_dill, '_import_module', None)) + +The code will attempt to find import_module in the importlib module. If not +present, it will use the _import_module function in _dill. + +Emulate new Python behavior in older Python versions: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +CellType.cell_contents behaves differently in Python 3.6 and 3.7. It is +read-only in Python 3.6 and writable and deletable in 3.7. + +if _dill.OLD37 and _dill.HAS_CTYPES and ...: + @_assign_to_dill_module + def _setattr(object, name, value): + if type(object) is _dill.CellType and name == 'cell_contents': + _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) + _PyCell_Set(object, value) + else: + setattr(object, name, value) +... # more cases below + +_setattr = GetAttrShim(_dill, '_setattr', setattr) + +_dill._setattr will be used when present to emulate Python 3.7 functionality in +older versions of Python while defaulting to the standard setattr in 3.7+. + +See this PR for the discussion that lead to this system: +https://github.com/uqfoundation/dill/pull/443 """ import inspect, sys _dill = sys.modules['dill._dill'] -# The values for the shims for this particular version of dill and Python. -_dill_true_values = {} class Shim(object): """ - A wrapper object that refers to a shim object in the module if it exists and - to another object if it doesn't exist. This choice is made during the - unpickle step instead of the pickling process. + Shim objects are wrappers used for compatibility enforcement during + unpickle-time. They should only be used in calls to pickler.save_reduce and + other Shim objects. They are only evaluated within unpickler.load. """ - def __new__(cls, name, alternative, module=None): - if callable(alternative): - return object.__new__(_CallableShim) + def __new__(cls, is_callable=False): + if is_callable: + if not hasattr(cls, '_Callable'): + cls._Callable = type('_Callable', (_CallableShimMixin, cls), {}) + return object.__new__(cls._Callable) else: return object.__new__(cls) - def __init__(self, name, alternative, module=None): - if _dill_true_values is not None: - self.module = _dill - g = globals() - if name in g: - _dill_true_values[name] = g[name] - elif module is None: - # Use the calling function's module - self.module = sys.modules[inspect.currentframe().f_back.f_globals['__name__']] - else: - self.module = module - self.name = name - self.alternative = alternative + def __init__(self, reduction): + super(Shim, self).__init__() + self.reduction = reduction def __copy__(self): return self # pragma: no cover def __deepcopy__(self, memo): return self # pragma: no cover def __reduce__(self): - return (getattr, (self.module, self.name, self.alternative)) + return self.reduction def __reduce_ex__(self, protocol): return self.__reduce__() -class _CallableShim(Shim): +class _CallableShimMixin(object): + # A version of Shim for functions. Used to trick pickler.save_reduce into + # thinking that Shim objects of functions are themselves meaningful functions. def __call__(self, *args, **kwargs): - return getattr(self.module, self.name, self.alternative)(*args, **kwargs) # pragma: no cover + reduction = self.__reduce__() + func = reduction[0] + args = reduction[1] + obj = func(args) + return obj(*args, **kwargs) + +class GetAttrShim(Shim): + """ + A Shim object that represents the getattr operation. When unpickled, the + GetAttrShim will access an attribute 'name' of 'object' and return the value + stored there. If the attribute doesn't exist, the default value will be + returned if present. + """ + NO_DEFAULT = _dill.Sentinel('_shims.GetAttrShim.NO_DEFAULT') + def __new__(cls, object, name, default=NO_DEFAULT): + return Shim.__new__(cls, is_callable=callable(default)) + def __init__(self, object, name, default=NO_DEFAULT): + if object is None: + # Use the calling function's module + object = sys.modules[inspect.currentframe().f_back.f_globals['__name__']] + + if default is GetAttrShim.NO_DEFAULT: + reduction = (getattr, (object, name)) + else: + reduction = (getattr, (object, name, default)) + + super(GetAttrShim, self).__init__(reduction) + + self.object = object + self.name = name + self.default = default + @classmethod + def _callable(cls, object, name, default=NO_DEFAULT): + return callable(default) + +def _assign_to_dill_module(func): + _dill.__dict__[func.__name__] = func + return func + +###################### +## Compatibility Shims are defined below +###################### # Used to stay compatible with versions of dill whose _create_cell functions # do not have a default value. # Can be safely replaced removed entirely (replaced by empty tuples for calls to # _create_cell) once breaking changes are allowed. -_CELL_EMPTY = _dill.Sentinel('_CELL_EMPTY') -_CELL_EMPTY = Shim('_CELL_EMPTY', None) -_dill_true_values['_CELL_REF'] = None +if _dill.HAS_CTYPES and not _dill.PY3: + _dill._CELL_EMPTY = _dill.Sentinel('_CELL_EMPTY') +_CELL_EMPTY = GetAttrShim(_dill, '_CELL_EMPTY', None) +_dill._CELL_REF = None if _dill.OLD37: @@ -69,6 +143,7 @@ def __call__(self, *args, **kwargs): _PyCell_Set = ctypes.pythonapi.PyCell_Set + @_assign_to_dill_module def _setattr(object, name, value): if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) @@ -76,6 +151,7 @@ def _setattr(object, name, value): else: setattr(object, name, value) + @_assign_to_dill_module def _delattr(object, name): if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) @@ -90,7 +166,8 @@ def _delattr(object, name): # Use nonlocal variables to reassign the cell value. # https://stackoverflow.com/a/59276835 __nonlocal = ('nonlocal cell',) - exec('''def _setattr(cell, name, value): + exec('''@_assign_to_dill_module + def _setattr(cell, name, value): if type(cell) is _dill.CellType and name == 'cell_contents': def cell_setter(value): %s @@ -100,7 +177,8 @@ def cell_setter(value): else: setattr(cell, name, value)''' % __nonlocal) - exec('''def _delattr(cell, name): + exec('''@_assign_to_dill_module + def _delattr(cell, name): if type(cell) is _dill.CellType and name == 'cell_contents': def cell_deleter(): %s @@ -114,6 +192,7 @@ def cell_deleter(): # Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode # manipulation. from . import _nonlocals + @_assign_to_dill_module @_nonlocals.export_nonlocals('cellv') def _setattr(cell, name, value): if type(cell) is _dill.CellType and name == 'cell_contents': @@ -125,6 +204,7 @@ def cell_setter(value): else: setattr(cell, name, value) + @_assign_to_dill_module def _delattr(cell, name): if type(cell) is _dill.CellType and name == 'cell_contents': pass @@ -132,9 +212,5 @@ def _delattr(cell, name): delattr(cell, name) -_setattr = Shim('_setattr', setattr) -_delattr = Shim('_delattr', delattr) - -# Update dill._dill with shim functions -sys.modules['dill._dill'].__dict__.update(_dill_true_values) -_dill_true_values = None +_setattr = GetAttrShim(_dill, '_setattr', setattr) +_delattr = GetAttrShim(_dill, '_delattr', delattr) diff --git a/tests/test_functions.py b/tests/test_functions.py index cf0f9720..48d62f5e 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -36,7 +36,7 @@ def function_e(e, *e1, e2=1, e3=2): return e + sum(e1) + e2 + e3''') -def make_empty_cell(): +def function_with_unassigned_variable(): if False: value = None return (lambda: value) @@ -58,7 +58,7 @@ def test_functions(): assert dill.loads(dumped_func_d)(1, 2, 3) == 6 assert dill.loads(dumped_func_d)(1, 2, d2=3) == 6 - empty_cell = make_empty_cell() + empty_cell = function_with_unassigned_variable() cell_copy = dill.loads(dill.dumps(empty_cell)) assert 'empty' in str(cell_copy.__closure__[0]) try: From 980241ae633a42c87bce91dcfde46a5250e82df8 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Tue, 28 Dec 2021 22:50:10 -0500 Subject: [PATCH 39/46] Strange issue with exec in PyPy3.6 --- dill/_shims.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dill/_shims.py b/dill/_shims.py index 722b6744..310f58c2 100644 --- a/dill/_shims.py +++ b/dill/_shims.py @@ -85,8 +85,8 @@ class _CallableShimMixin(object): def __call__(self, *args, **kwargs): reduction = self.__reduce__() func = reduction[0] - args = reduction[1] - obj = func(args) + f_args = reduction[1] + obj = func(*f_args) return obj(*args, **kwargs) class GetAttrShim(Shim): @@ -166,8 +166,7 @@ def _delattr(object, name): # Use nonlocal variables to reassign the cell value. # https://stackoverflow.com/a/59276835 __nonlocal = ('nonlocal cell',) - exec('''@_assign_to_dill_module - def _setattr(cell, name, value): + exec('''def _setattr(cell, name, value): if type(cell) is _dill.CellType and name == 'cell_contents': def cell_setter(value): %s @@ -176,9 +175,9 @@ def cell_setter(value): func(value) else: setattr(cell, name, value)''' % __nonlocal) + _assign_to_dill_module(_setattr) - exec('''@_assign_to_dill_module - def _delattr(cell, name): + exec('''def _delattr(cell, name): if type(cell) is _dill.CellType and name == 'cell_contents': def cell_deleter(): %s @@ -187,6 +186,7 @@ def cell_deleter(): func() else: delattr(cell, name)''' % __nonlocal) + _assign_to_dill_module(_delattr) else: # Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode From 7d41c799034a7a52e638b5c1779db983479c1119 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 29 Dec 2021 21:28:58 -0500 Subject: [PATCH 40/46] Better _shims.py --- dill/_dill.py | 24 ++++-- dill/_nonlocals.py | 145 ------------------------------------ dill/_shims.py | 179 ++++++++++++++++++++++++++++----------------- 3 files changed, 129 insertions(+), 219 deletions(-) delete mode 100644 dill/_nonlocals.py diff --git a/dill/_dill.py b/dill/_dill.py index af400c29..a679a7fb 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -260,15 +260,22 @@ def get_file_type(*args, **kwargs): except NameError: ExitType = None singletontypes = [] +import inspect + ### Shims for different versions of Python and dill class Sentinel(object): """ Create a unique sentinel object that is pickled as a constant. """ - def __init__(self, name): + def __init__(self, name, module_name=None): self.name = name + if module_name is None: + # Use the calling frame's module + self.__module__ = inspect.currentframe().f_back.f_globals['__name__'] + else: + self.__module__ = module_name def __repr__(self): - return __name__ + '.' + self.name # pragma: no cover + return self.__module__ + '.' + self.name # pragma: no cover def __copy__(self): return self # pragma: no cover def __deepcopy__(self, memo): @@ -922,15 +929,20 @@ def __getattribute__(self, attr): attrs[index] = ".".join([attrs[index], attr]) return type(self)(attrs, index) +# _CELL_REF and _CELL_EMPTY are used to stay compatible with versions of dill +# whose _create_cell functions do not have a default value. +# Can be safely replaced removed entirely (replaced by empty tuples for calls to +# _create_cell) once breaking changes are allowed. +_CELL_REF = None + if PY3: def _create_cell(contents=None): return (lambda: contents).__closure__[0] else: + _CELL_EMPTY = Sentinel('_CELL_EMPTY') + def _create_cell(contents=None): - # _CELL_EMPTY is a sentinel object defined in _shims.py for specifying - # that a cell is empty. Is only needed in PyPy 2.7, but is included for - # compatibility across versions of Python. if contents is not _CELL_EMPTY: value = contents return (lambda: value).func_closure[0] @@ -1355,7 +1367,7 @@ def save_cell(pickler, obj): # _shims._CELL_EMPTY is defined in _shims.py to support PyPy 2.7. # It unpickles to a sentinel object _dill._CELL_EMPTY, also created in # _shims.py. This object is not present in Python 3 because the cell's - # contents can be deleted in newer versions of Python. The shim object + # contents can be deleted in newer versions of Python. The reduce object # will instead unpickle to None if unpickled in Python 3. # When breaking changes are made to dill, (_shims._CELL_EMPTY,) can diff --git a/dill/_nonlocals.py b/dill/_nonlocals.py deleted file mode 100644 index 12b32f03..00000000 --- a/dill/_nonlocals.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) 2014 Ryan Gonzalez -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -""" -This file is only needed for PyPy2. It can be removed when support is dropped. -https://code.activestate.com/recipes/578965-python-2-nonlocal/ -""" - -import inspect, types, dis - -__all__ = ['export_nonlocals', 'nonlocals'] - -# http://www.jonathon-vogel.com/posts/patching_function_bytecode_with_python/ -def find_code(code, f): - i = 0 - while i < len(code): - if f(code, i): - return i - elif code[i] < dis.HAVE_ARGUMENT: - i += 1 - else: - i += 3 - -# http://nedbatchelder.com/blog/201301/byterun_and_making_cells.html -def make_cell(value): - return (lambda x: lambda: x)(value).func_closure[0] - -globals().update(dis.opmap) - -def export_nonlocals(*vars): - def func(f): - code = map(ord, f.func_code.co_code) - varnames = list(f.func_code.co_varnames) - names = list(f.func_code.co_names) - cf=lambda c,i:c[i] in (LOAD_FAST,STORE_FAST) and varnames[c[i+1]] in vars - while True: - idx = find_code(code, cf) - if idx is None: - break - code[idx] = LOAD_NAME if code[idx] == LOAD_FAST else STORE_NAME - var = varnames[code[idx+1]] - code[idx+1] = len(names) - try: - code[idx+1] = names.index(var) - except ValueError: - names.append(var) - for i, var in enumerate(filter(varnames.__contains__, names)): - varnames[varnames.index(var)] = '__anon_var_%d' % i - rescode = types.CodeType(f.func_code.co_argcount, f.func_code.co_nlocals, - f.func_code.co_stacksize, - f.func_code.co_flags^0x01, - ''.join(map(chr, code)), f.func_code.co_consts, - tuple(names), tuple(varnames), - f.func_code.co_filename, f.func_code.co_name, - f.func_code.co_firstlineno, - f.func_code.co_lnotab, f.func_code.co_freevars, - f.func_code.co_cellvars) - return types.FunctionType(rescode, dict(f.func_globals, __ns=True), - f.func_name, f.func_defaults, f.func_closure) - return func - -def nonlocals(*vars, **kwargs): - def func(f): - caller = inspect.stack()[1][0] - caller_vars = caller.f_globals - caller_vars.update(caller.f_locals) - code = map(ord, f.func_code.co_code) - varmap = {} - freevars = list(f.func_code.co_freevars) - freec = len(freevars) - freeoffs = len(f.func_code.co_cellvars) - varnames = list(f.func_code.co_varnames) - closure = list(f.func_closure or []) - names = list(f.func_code.co_names) - consts = list(f.func_code.co_consts) - fglobals = {'__nonlocal_plocals': caller.f_locals} - names.extend(fglobals.keys()) - plocals_pos = len(names)-1 - offs = 0 - closure_override = kwargs.get('closure_override', None) - def cf(c, i): - if c[i] in (LOAD_FAST, STORE_FAST) and varnames[c[i+1]] in vars: - return True - elif c[i] in dis.hasjabs: - c[i+1] += offs - while True: - idx = find_code(code, cf) - if idx is None: - break - code[idx] = LOAD_DEREF if code[idx] == LOAD_FAST else STORE_DEREF - var = varnames[code[idx+1]] - code[idx+1] = len(freevars) - try: - code[idx+1] = freevars.index(var) - except ValueError: - freevars.append(var) - code[idx+1] += freeoffs - if code[idx] == STORE_DEREF and caller_vars.get('__ns') == True: - const_id = len(consts) - try: - const_id = consts.index(var) - except ValueError: - consts.append(var) - code.insert(idx, DUP_TOP) - code[idx+4:idx+4] = [ - LOAD_GLOBAL, plocals_pos, 0, - LOAD_CONST, const_id, 0, - STORE_SUBSCR - ] - offs += 4 - nlocals = len(freevars)-freec+f.func_code.co_nlocals - if closure_override is None: - closure.extend(map(make_cell, map(caller_vars.__getitem__, - freevars[freec:]))) - else: - closure.extend(closure_override) - rescode = types.CodeType(f.func_code.co_argcount, nlocals, - f.func_code.co_stacksize, f.func_code.co_flags, - ''.join(map(chr, code)), tuple(consts), - tuple(names), tuple(varnames), - f.func_code.co_filename, f.func_code.co_name, - f.func_code.co_firstlineno, - f.func_code.co_lnotab, tuple(freevars), - f.func_code.co_cellvars) - return types.FunctionType(rescode, dict(f.func_globals, **fglobals), - f.func_name, f.func_defaults, tuple(closure)) - return func diff --git a/dill/_shims.py b/dill/_shims.py index 310f58c2..89ef159f 100644 --- a/dill/_shims.py +++ b/dill/_shims.py @@ -16,11 +16,11 @@ Assume that we were transitioning _import_module in _dill.py to the builtin function importlib.import_module when present. -@_assign_to_dill_module +@assign_to(_dill) def _import_module(import_name): ... # code already in _dill.py -_import_module = GetAttrShim(importlib, 'import_module', GetAttrShim(_dill, '_import_module', None)) +_import_module = Getattr(importlib, 'import_module', Getattr(_dill, '_import_module', None)) The code will attempt to find import_module in the importlib module. If not present, it will use the _import_module function in _dill. @@ -31,7 +31,7 @@ def _import_module(import_name): read-only in Python 3.6 and writable and deletable in 3.7. if _dill.OLD37 and _dill.HAS_CTYPES and ...: - @_assign_to_dill_module + @assign_to(_dill) def _setattr(object, name, value): if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) @@ -40,7 +40,7 @@ def _setattr(object, name, value): setattr(object, name, value) ... # more cases below -_setattr = GetAttrShim(_dill, '_setattr', setattr) +_setattr = Getattr(_dill, '_setattr', setattr) _dill._setattr will be used when present to emulate Python 3.7 functionality in older versions of Python while defaulting to the standard setattr in 3.7+. @@ -54,22 +54,35 @@ def _setattr(object, name, value): _dill = sys.modules['dill._dill'] -class Shim(object): +class Reduce(object): """ - Shim objects are wrappers used for compatibility enforcement during - unpickle-time. They should only be used in calls to pickler.save_reduce and - other Shim objects. They are only evaluated within unpickler.load. + Reduce objects are wrappers used for compatibility enforcement during + unpickle-time. They should only be used in calls to pickler.save and + other Reduce objects. They are only evaluated within unpickler.load. + + Pickling a Reduce object makes the two implementations equivalent: + + pickler.save(Reduce(*reduction)) + + pickler.save_reduce(*reduction, obj=reduction) """ - def __new__(cls, is_callable=False): + def __new__(cls, *reduction, **kwargs): + """ + Args: + *reduction: a tuple that matches the format given here: + https://docs.python.org/3/library/pickle.html#object.__reduce__ + is_callable: a bool to indicate that the object created by + unpickling `reduction` is callable. If true, the current Reduce + is allowed to be used as the function in further save_reduce calls + or Reduce objects. + """ + is_callable = kwargs.get('is_callable', False) # Pleases Py2. Can be removed later if is_callable: - if not hasattr(cls, '_Callable'): - cls._Callable = type('_Callable', (_CallableShimMixin, cls), {}) - return object.__new__(cls._Callable) + self = object.__new__(_CallableReduce) else: - return object.__new__(cls) - def __init__(self, reduction): - super(Shim, self).__init__() + self = object.__new__(Reduce) self.reduction = reduction + return self def __copy__(self): return self # pragma: no cover def __deepcopy__(self, memo): @@ -79,9 +92,9 @@ def __reduce__(self): def __reduce_ex__(self, protocol): return self.__reduce__() -class _CallableShimMixin(object): - # A version of Shim for functions. Used to trick pickler.save_reduce into - # thinking that Shim objects of functions are themselves meaningful functions. +class _CallableReduce(Reduce): + # A version of Reduce for functions. Used to trick pickler.save_reduce into + # thinking that Reduce objects of functions are themselves meaningful functions. def __call__(self, *args, **kwargs): reduction = self.__reduce__() func = reduction[0] @@ -89,52 +102,56 @@ def __call__(self, *args, **kwargs): obj = func(*f_args) return obj(*args, **kwargs) -class GetAttrShim(Shim): +__NO_DEFAULT = _dill.Sentinel('Getattr.NO_DEFAULT') + +def Getattr(object, name, default=__NO_DEFAULT): """ - A Shim object that represents the getattr operation. When unpickled, the - GetAttrShim will access an attribute 'name' of 'object' and return the value + A Reduce object that represents the getattr operation. When unpickled, the + Getattr will access an attribute 'name' of 'object' and return the value stored there. If the attribute doesn't exist, the default value will be returned if present. + + The following statements are equivalent: + + Getattr(collections, 'OrderedDict') + Getattr(collections, 'spam', None) + Getattr(*args) + + Reduce(getattr, (collections, 'OrderedDict')) + Reduce(getattr, (collections, 'spam', None)) + Reduce(getattr, args) + + During unpickling, the first two will result in collections.OrderedDict and + None respectively because the first attribute exists and the second one does + not, forcing it to use the default value given in the third argument. """ - NO_DEFAULT = _dill.Sentinel('_shims.GetAttrShim.NO_DEFAULT') - def __new__(cls, object, name, default=NO_DEFAULT): - return Shim.__new__(cls, is_callable=callable(default)) - def __init__(self, object, name, default=NO_DEFAULT): - if object is None: - # Use the calling function's module - object = sys.modules[inspect.currentframe().f_back.f_globals['__name__']] - - if default is GetAttrShim.NO_DEFAULT: - reduction = (getattr, (object, name)) - else: - reduction = (getattr, (object, name, default)) - super(GetAttrShim, self).__init__(reduction) + if default is Getattr.NO_DEFAULT: + reduction = (getattr, (object, name)) + else: + reduction = (getattr, (object, name, default)) + + return Reduce(*reduction, is_callable=callable(default)) - self.object = object - self.name = name - self.default = default - @classmethod - def _callable(cls, object, name, default=NO_DEFAULT): - return callable(default) +Getattr.NO_DEFAULT = __NO_DEFAULT +del __NO_DEFAULT -def _assign_to_dill_module(func): - _dill.__dict__[func.__name__] = func - return func +def assign_to(module, name=None): + def decorator(func): + if name is None: + fname = func.__name__ + else: + fname = name + module.__dict__[fname] = func + func.__module__ = module.__name__ + return func + return decorator ###################### ## Compatibility Shims are defined below ###################### -# Used to stay compatible with versions of dill whose _create_cell functions -# do not have a default value. -# Can be safely replaced removed entirely (replaced by empty tuples for calls to -# _create_cell) once breaking changes are allowed. -if _dill.HAS_CTYPES and not _dill.PY3: - _dill._CELL_EMPTY = _dill.Sentinel('_CELL_EMPTY') -_CELL_EMPTY = GetAttrShim(_dill, '_CELL_EMPTY', None) -_dill._CELL_REF = None - +_CELL_EMPTY = Getattr(_dill, '_CELL_EMPTY', None) if _dill.OLD37: if _dill.HAS_CTYPES and hasattr(_dill.ctypes, 'pythonapi') and hasattr(_dill.ctypes.pythonapi, 'PyCell_Set'): @@ -143,7 +160,7 @@ def _assign_to_dill_module(func): _PyCell_Set = ctypes.pythonapi.PyCell_Set - @_assign_to_dill_module + @assign_to(_dill) def _setattr(object, name, value): if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) @@ -151,7 +168,7 @@ def _setattr(object, name, value): else: setattr(object, name, value) - @_assign_to_dill_module + @assign_to(_dill) def _delattr(object, name): if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) @@ -175,7 +192,7 @@ def cell_setter(value): func(value) else: setattr(cell, name, value)''' % __nonlocal) - _assign_to_dill_module(_setattr) + assign_to(_dill)(_setattr) exec('''def _delattr(cell, name): if type(cell) is _dill.CellType and name == 'cell_contents': @@ -186,31 +203,57 @@ def cell_deleter(): func() else: delattr(cell, name)''' % __nonlocal) - _assign_to_dill_module(_delattr) + assign_to(_dill)(_delattr) else: # Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode # manipulation. - from . import _nonlocals - @_assign_to_dill_module - @_nonlocals.export_nonlocals('cellv') + + # The following function is based on 'cell_set' from 'cloudpickle' + # https://github.com/cloudpipe/cloudpickle/blob/5d89947288a18029672596a4d719093cc6d5a412/cloudpickle/cloudpickle.py#L393-L482 + # Copyright (c) 2012, Regents of the University of California. + # Copyright (c) 2009 `PiCloud, Inc. `_. + # License: https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE + @assign_to(_dill) def _setattr(cell, name, value): if type(cell) is _dill.CellType and name == 'cell_contents': - cellv = None - @_nonlocals.nonlocals('cellv', closure_override=(cell,)) - def cell_setter(value): - cellv = value # pylint: disable=unused-variable - cell_setter(value) + _cell_set = _dill.FunctionType( + _cell_set_template_code, {}, '_cell_set', (), (cell,),) + _cell_set(value) else: setattr(cell, name, value) - @_assign_to_dill_module + def _cell_set_factory(value): + lambda: cell + cell = value + + co = _cell_set_factory.__code__ + + _cell_set_template_code = _dill.CodeType( + co.co_argcount, + co.co_nlocals, + co.co_stacksize, + co.co_flags, + co.co_code, + co.co_consts, + co.co_names, + co.co_varnames, + co.co_filename, + co.co_name, + co.co_firstlineno, + co.co_lnotab, + co.co_cellvars, # co_freevars is initialized with co_cellvars + (), # co_cellvars is made empty + ) + + del co + + @assign_to(_dill) def _delattr(cell, name): if type(cell) is _dill.CellType and name == 'cell_contents': pass else: delattr(cell, name) - -_setattr = GetAttrShim(_dill, '_setattr', setattr) -_delattr = GetAttrShim(_dill, '_delattr', delattr) +_setattr = Getattr(_dill, '_setattr', setattr) +_delattr = Getattr(_dill, '_delattr', delattr) From c6ea843ead3d24e1409212ca5a48a39a73915313 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Fri, 31 Dec 2021 13:12:09 -0500 Subject: [PATCH 41/46] Rename cell_stack back to postproc It is more flexible and allows for future plans in going to cPickle. --- dill/_dill.py | 99 ++++++++++++++++++++++++++++---------------------- dill/_shims.py | 21 ++++++----- 2 files changed, 68 insertions(+), 52 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index a679a7fb..6072e872 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -286,6 +286,7 @@ def __reduce_ex__(self, protocol): return self.name from . import _shims +from ._shims import Reduce, Getattr ### File modes #: Pickles the file handle, preserving mode. The position of the unpickled @@ -487,28 +488,29 @@ def __missing__(self, key): raise KeyError() -def _enter_recursive_cell_stack(pickler, obj, is_pickler_dill=None): +def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO_DEFAULT): + if obj is Getattr.NO_DEFAULT: + obj = Reduce(reduction) + if is_pickler_dill is None: is_pickler_dill = is_dill(pickler, child=True) if is_pickler_dill: - # assert id(obj) not in pickler._recursive_cells, str(obj) + ' already pushed on stack!' + # assert id(obj) not in pickler._postproc, str(obj) + ' already pushed on stack!' # if not hasattr(pickler, 'x'): pickler.x = 0 # print(pickler.x*' ', 'push', obj, id(obj), pickler._recurse) # pickler.x += 1 l = [] - pickler._recursive_cells[id(obj)] = (len(pickler._recursive_cells), l) - return l + pickler._postproc[id(obj)] = l + + pickler.save_reduce(*reduction, obj=obj) -def _exit_recursive_cell_stack(pickler, obj, is_pickler_dill=None): - if is_pickler_dill is None: - is_pickler_dill = is_dill(pickler, child=True) if is_pickler_dill: # pickler.x -= 1 # print(pickler.x*' ', 'pop', obj, id(obj)) - i, recursive_cells = pickler._recursive_cells.pop(id(obj)) - # assert i == len(pickler._recursive_cells), 'Stack tampered!' - for t in recursive_cells: - pickler.save_reduce(_shims._setattr, (t, 'cell_contents', obj)) + postproc = pickler._postproc.pop(id(obj)) + # assert l == postproc, 'Stack tampered!' + for reduction in postproc: + pickler.save_reduce(*reduction) # pop None created by _setattr off stack if PY3: pickler.write(bytes('0', 'UTF-8')) @@ -535,7 +537,7 @@ def __init__(self, *args, **kwds): self._strictio = False #_strictio self._fmode = settings['fmode'] if _fmode is None else _fmode self._recurse = settings['recurse'] if _recurse is None else _recurse - self._recursive_cells = {} + self._postproc = {} def dump(self, obj): #NOTE: if settings change, need to update attributes # register if the object is a numpy ufunc @@ -931,7 +933,7 @@ def __getattribute__(self, attr): # _CELL_REF and _CELL_EMPTY are used to stay compatible with versions of dill # whose _create_cell functions do not have a default value. -# Can be safely replaced removed entirely (replaced by empty tuples for calls to +# Can be safely removed entirely (replaced by empty tuples for calls to # _create_cell) once breaking changes are allowed. _CELL_REF = None @@ -1384,14 +1386,14 @@ def save_cell(pickler, obj): log.info("# Ce3") return if is_dill(pickler, child=True): - recursive_cells = pickler._recursive_cells.get(id(f)) - if recursive_cells is not None: + postproc = pickler._postproc.get(id(f)) + if postproc is not None: log.info("Ce2: %s" % obj) # _CELL_REF is defined in _shims.py to support older versions of # dill. When breaking changes are made to dill, (_CELL_REF,) can # be replaced by () pickler.save_reduce(_create_cell, (_CELL_REF,), obj=obj) - recursive_cells[1].append(obj) + postproc.append((_shims._setattr, (obj, 'cell_contents', f))) log.info("# Ce2") return log.info("Ce1: %s" % obj) @@ -1573,7 +1575,7 @@ def save_type(pickler, obj): pickler_is_dill = is_dill(pickler, child=True) if issubclass(type(obj), type): # try: # used when pickling the class as code (or the interpreter) - if pickler_is_dill and not pickler._byref and id(obj) not in pickler._recursive_cells: + if pickler_is_dill and not pickler._byref and id(obj) not in pickler._postproc: # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' log.info("%s: %s" % (_t, obj)) @@ -1595,10 +1597,9 @@ def save_type(pickler, obj): for name in _dict.get("__slots__", []): del _dict[name] name = getattr(obj, "__qualname__", obj.__name__) - _enter_recursive_cell_stack(pickler, obj, pickler_is_dill) - pickler.save_reduce(_create_type, (type(obj), name, - obj.__bases__, _dict), obj=obj) - _exit_recursive_cell_stack(pickler, obj, pickler_is_dill) + _save_with_postproc(pickler, (_create_type, ( + type(obj), name, obj.__bases__, _dict + )), pickler_is_dill, obj=obj) log.info("# %s" % _t) # special cases: NoneType, NotImplementedType, EllipsisType elif obj is type(None): @@ -1652,10 +1653,10 @@ def save_classmethod(pickler, obj): def save_function(pickler, obj): if not _locate_function(obj): #, pickler._session): log.info("F1: %s" % obj) - _recursive_cells = getattr(pickler, '_recursive_cells', ()) + _postproc = getattr(pickler, '_postproc', ()) _byref = getattr(pickler, '_byref', None) _recurse = getattr(pickler, '_recurse', None) - _memo = (id(obj) in _recursive_cells) and (_recurse is not None) + _memo = (id(obj) in _postproc) and (_recurse is not None) if _recurse and not _memo: # recurse to get all globals referred to by obj from .detect import globalvars @@ -1664,28 +1665,40 @@ def save_function(pickler, obj): globs = obj.__globals__ if PY3 else obj.func_globals if _memo: pickler._recurse = False - else: - _enter_recursive_cell_stack(pickler, obj) - if PY3: - #NOTE: workaround for 'super' (see issue #75) - _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) - fkwdefaults = getattr(obj, '__kwdefaults__', None) - pickler.save_reduce(_create_function, (obj.__code__, - globs, obj.__name__, - obj.__defaults__, obj.__closure__, - obj.__dict__, fkwdefaults), obj=obj) - else: - _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) - if _super: pickler._byref = True - pickler.save_reduce(_create_function, (obj.func_code, - globs, obj.func_name, - obj.func_defaults, obj.func_closure, - obj.__dict__), obj=obj) - if _super: pickler._byref = _byref - if _memo: + if PY3: + #NOTE: workaround for 'super' (see issue #75) + _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) + fkwdefaults = getattr(obj, '__kwdefaults__', None) + pickler.save_reduce(_create_function, (obj.__code__, + globs, obj.__name__, + obj.__defaults__, obj.__closure__, + obj.__dict__, fkwdefaults), obj=obj) + else: + _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) + if _super: pickler._byref = True + pickler.save_reduce(_create_function, (obj.func_code, + globs, obj.func_name, + obj.func_defaults, obj.func_closure, + obj.__dict__), obj=obj) + if _super: pickler._byref = _byref pickler._recurse = _recurse else: - _exit_recursive_cell_stack(pickler, obj) + if PY3: + #NOTE: workaround for 'super' (see issue #75) + _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) + fkwdefaults = getattr(obj, '__kwdefaults__', None) + _save_with_postproc(pickler, (_create_function, ( + obj.__code__, globs, obj.__name__, obj.__defaults__, + obj.__closure__, obj.__dict__, fkwdefaults + )), obj=obj) + else: + _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) + if _super: pickler._byref = True + _save_with_postproc(pickler, (_create_function, ( + obj.func_code, globs, obj.func_name, obj.func_defaults, + obj.func_closure, obj.__dict__ + )), obj=obj) + if _super: pickler._byref = _byref #clear = (_byref, _super, _recurse, _memo) #print(clear + (OLDER,)) #NOTE: workaround for #234; "partial" still is problematic for recurse diff --git a/dill/_shims.py b/dill/_shims.py index 89ef159f..c0cdf413 100644 --- a/dill/_shims.py +++ b/dill/_shims.py @@ -16,7 +16,7 @@ Assume that we were transitioning _import_module in _dill.py to the builtin function importlib.import_module when present. -@assign_to(_dill) +@move_to(_dill) def _import_module(import_name): ... # code already in _dill.py @@ -31,7 +31,7 @@ def _import_module(import_name): read-only in Python 3.6 and writable and deletable in 3.7. if _dill.OLD37 and _dill.HAS_CTYPES and ...: - @assign_to(_dill) + @move_to(_dill) def _setattr(object, name, value): if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) @@ -66,6 +66,7 @@ class Reduce(object): pickler.save_reduce(*reduction, obj=reduction) """ + __slots__ = ['reduction'] def __new__(cls, *reduction, **kwargs): """ Args: @@ -83,6 +84,8 @@ def __new__(cls, *reduction, **kwargs): self = object.__new__(Reduce) self.reduction = reduction return self + def __repr__(self): + return 'Reduce%s' % (self.reduction,) def __copy__(self): return self # pragma: no cover def __deepcopy__(self, memo): @@ -136,7 +139,7 @@ def Getattr(object, name, default=__NO_DEFAULT): Getattr.NO_DEFAULT = __NO_DEFAULT del __NO_DEFAULT -def assign_to(module, name=None): +def move_to(module, name=None): def decorator(func): if name is None: fname = func.__name__ @@ -160,7 +163,7 @@ def decorator(func): _PyCell_Set = ctypes.pythonapi.PyCell_Set - @assign_to(_dill) + @move_to(_dill) def _setattr(object, name, value): if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object) @@ -168,7 +171,7 @@ def _setattr(object, name, value): else: setattr(object, name, value) - @assign_to(_dill) + @move_to(_dill) def _delattr(object, name): if type(object) is _dill.CellType and name == 'cell_contents': _PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p) @@ -192,7 +195,7 @@ def cell_setter(value): func(value) else: setattr(cell, name, value)''' % __nonlocal) - assign_to(_dill)(_setattr) + move_to(_dill)(_setattr) exec('''def _delattr(cell, name): if type(cell) is _dill.CellType and name == 'cell_contents': @@ -203,7 +206,7 @@ def cell_deleter(): func() else: delattr(cell, name)''' % __nonlocal) - assign_to(_dill)(_delattr) + move_to(_dill)(_delattr) else: # Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode @@ -214,7 +217,7 @@ def cell_deleter(): # Copyright (c) 2012, Regents of the University of California. # Copyright (c) 2009 `PiCloud, Inc. `_. # License: https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE - @assign_to(_dill) + @move_to(_dill) def _setattr(cell, name, value): if type(cell) is _dill.CellType and name == 'cell_contents': _cell_set = _dill.FunctionType( @@ -248,7 +251,7 @@ def _cell_set_factory(value): del co - @assign_to(_dill) + @move_to(_dill) def _delattr(cell, name): if type(cell) is _dill.CellType and name == 'cell_contents': pass From 89c1487d668f8751c3fddd0069fd5147b8057b8f Mon Sep 17 00:00:00 2001 From: anivegesana Date: Fri, 31 Dec 2021 17:52:58 -0500 Subject: [PATCH 42/46] Add _CELL_EMPTY to Python 3 in case it makes cPickle implementation easier --- dill/_dill.py | 14 ++++++++------ dill/_shims.py | 4 ++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 6072e872..bd62e5f3 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -933,17 +933,18 @@ def __getattribute__(self, attr): # _CELL_REF and _CELL_EMPTY are used to stay compatible with versions of dill # whose _create_cell functions do not have a default value. -# Can be safely removed entirely (replaced by empty tuples for calls to -# _create_cell) once breaking changes are allowed. +# _CELL_REF can be safely removed entirely (replaced by empty tuples for calls +# to _create_cell) once breaking changes are allowed. _CELL_REF = None +_CELL_EMPTY = Sentinel('_CELL_EMPTY') if PY3: def _create_cell(contents=None): - return (lambda: contents).__closure__[0] + if contents is not _CELL_EMPTY: + value = contents + return (lambda: value).__closure__[0] else: - _CELL_EMPTY = Sentinel('_CELL_EMPTY') - def _create_cell(contents=None): if contents is not _CELL_EMPTY: value = contents @@ -1373,7 +1374,8 @@ def save_cell(pickler, obj): # will instead unpickle to None if unpickled in Python 3. # When breaking changes are made to dill, (_shims._CELL_EMPTY,) can - # be replaced by () + # be replaced by () OR the delattr function can be removed repending on + # whichever is more convienient. pickler.save_reduce(_create_cell, (_shims._CELL_EMPTY,), obj=obj) # Call the function _delattr on the cell's cell_contents attribute # The result of this function call will be None diff --git a/dill/_shims.py b/dill/_shims.py index c0cdf413..ac20eca3 100644 --- a/dill/_shims.py +++ b/dill/_shims.py @@ -199,6 +199,10 @@ def cell_setter(value): exec('''def _delattr(cell, name): if type(cell) is _dill.CellType and name == 'cell_contents': + try: + cell.cell_contents + except: + return def cell_deleter(): %s del cell # pylint: disable=unused-variable From bd350b6744182fa9ba42ac55575836578f7fb5a5 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 5 Jan 2022 00:47:26 -0500 Subject: [PATCH 43/46] Add postproc_list to _save_with_postproc --- dill/_dill.py | 132 ++++++++++++++++++++++------------------- tests/test_selected.py | 2 +- 2 files changed, 72 insertions(+), 62 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index bd62e5f3..919bbd38 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -28,6 +28,7 @@ def _trace(boolean): if boolean: log.setLevel(logging.INFO) else: log.setLevel(logging.WARN) return +import warnings import os import sys @@ -488,7 +489,7 @@ def __missing__(self, key): raise KeyError() -def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO_DEFAULT): +def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO_DEFAULT, postproc_list=None): if obj is Getattr.NO_DEFAULT: obj = Reduce(reduction) @@ -499,17 +500,19 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO # if not hasattr(pickler, 'x'): pickler.x = 0 # print(pickler.x*' ', 'push', obj, id(obj), pickler._recurse) # pickler.x += 1 - l = [] - pickler._postproc[id(obj)] = l + if postproc_list is None: + postproc_list = [] + pickler._postproc[id(obj)] = postproc_list + # TODO: Use state_setter in Python 3.8 to allow for faster cPickle implementations pickler.save_reduce(*reduction, obj=obj) if is_pickler_dill: # pickler.x -= 1 # print(pickler.x*' ', 'pop', obj, id(obj)) postproc = pickler._postproc.pop(id(obj)) - # assert l == postproc, 'Stack tampered!' - for reduction in postproc: + # assert postproc_list == postproc, 'Stack tampered!' + for reduction in reversed(postproc): pickler.save_reduce(*reduction) # pop None created by _setattr off stack if PY3: @@ -735,6 +738,7 @@ def _create_function(fcode, fglobals, fname=None, fdefaults=None, # thus we need to make sure that we have __builtins__ as well if "__builtins__" not in func.__globals__: func.__globals__["__builtins__"] = globals()["__builtins__"] + # assert id(fglobals) == id(func.__globals__) return func def _create_code(*args): @@ -1347,6 +1351,10 @@ def save_wrapper_descriptor(pickler, obj): @register(MethodWrapperType) def save_instancemethod(pickler, obj): log.info("Mw: %s" % obj) + if IS_PYPY2 and obj.__self__ is None and obj.im_class: + # Can be a class method in PYPY2 if __self__ is none + pickler.save_reduce(getattr, (obj.im_class, obj.__name__), obj=obj) + return pickler.save_reduce(getattr, (obj.__self__, obj.__name__), obj=obj) log.info("# Mw") return @@ -1394,8 +1402,8 @@ def save_cell(pickler, obj): # _CELL_REF is defined in _shims.py to support older versions of # dill. When breaking changes are made to dill, (_CELL_REF,) can # be replaced by () - pickler.save_reduce(_create_cell, (_CELL_REF,), obj=obj) postproc.append((_shims._setattr, (obj, 'cell_contents', f))) + pickler.save_reduce(_create_cell, (_CELL_REF,), obj=obj) log.info("# Ce2") return log.info("Ce1: %s" % obj) @@ -1562,7 +1570,7 @@ def save_module(pickler, obj): return @register(TypeType) -def save_type(pickler, obj): +def save_type(pickler, obj, postproc_list=None): if obj in _typemap: log.info("T1: %s" % obj) pickler.save_reduce(_load_type, (_typemap[obj],), obj=obj) @@ -1573,8 +1581,27 @@ def save_type(pickler, obj): pickler.save_reduce(_create_namedtuple, (getattr(obj, "__qualname__", obj.__name__), obj._fields, obj.__module__), obj=obj) log.info("# T6") return - elif obj.__module__ == '__main__': + # special cases: NoneType, NotImplementedType, EllipsisType + elif obj is type(None): + log.info("T7: %s" % obj) + #XXX: pickler.save_reduce(type, (None,), obj=obj) + if PY3: + pickler.write(bytes('c__builtin__\nNoneType\n', 'UTF-8')) + else: + pickler.write('c__builtin__\nNoneType\n') + log.info("# T7") + elif obj is NotImplementedType: + log.info("T7: %s" % obj) + pickler.save_reduce(type, (NotImplemented,), obj=obj) + log.info("# T7") + elif obj is EllipsisType: + log.info("T7: %s" % obj) + pickler.save_reduce(type, (Ellipsis,), obj=obj) + log.info("# T7") + + elif not _locate_function(obj): # not a function, but the name was held over pickler_is_dill = is_dill(pickler, child=True) + # assert id(obj) not in pickler._postproc if issubclass(type(obj), type): # try: # used when pickling the class as code (or the interpreter) if pickler_is_dill and not pickler._byref and id(obj) not in pickler._postproc: @@ -1584,6 +1611,7 @@ def save_type(pickler, obj): _dict = _dict_from_dictproxy(obj.__dict__) # except: # punt to StockPickler (pickle by class reference) else: + warnings.warn('The byref setting is on, but %s cannot be located.' % (obj,), RuntimeWarning) log.info("T5: %s" % obj) name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) StockPickler.save_global(pickler, obj, name=name) @@ -1601,25 +1629,8 @@ def save_type(pickler, obj): name = getattr(obj, "__qualname__", obj.__name__) _save_with_postproc(pickler, (_create_type, ( type(obj), name, obj.__bases__, _dict - )), pickler_is_dill, obj=obj) + )), pickler_is_dill, obj=obj, postproc_list=postproc_list) log.info("# %s" % _t) - # special cases: NoneType, NotImplementedType, EllipsisType - elif obj is type(None): - log.info("T7: %s" % obj) - #XXX: pickler.save_reduce(type, (None,), obj=obj) - if PY3: - pickler.write(bytes('c__builtin__\nNoneType\n', 'UTF-8')) - else: - pickler.write('c__builtin__\nNoneType\n') - log.info("# T7") - elif obj is NotImplementedType: - log.info("T7: %s" % obj) - pickler.save_reduce(type, (NotImplemented,), obj=obj) - log.info("# T7") - elif obj is EllipsisType: - log.info("T7: %s" % obj) - pickler.save_reduce(type, (Ellipsis,), obj=obj) - log.info("# T7") else: log.info("T4: %s" % obj) #print (obj.__dict__) @@ -1630,6 +1641,12 @@ def save_type(pickler, obj): log.info("# T4") return +# Error in PyPy 2.7 when adding ABC support +if IS_PYPY2: + @register(FrameType) + def save_frame(pickler, obj): + raise PicklingError('Cannot pickle a Python stack frame') + @register(property) def save_property(pickler, obj): log.info("Pr: %s" % obj) @@ -1665,42 +1682,36 @@ def save_function(pickler, obj): globs = globalvars(obj, recurse=True, builtin=True) else: globs = obj.__globals__ if PY3 else obj.func_globals - if _memo: - pickler._recurse = False - if PY3: - #NOTE: workaround for 'super' (see issue #75) - _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) - fkwdefaults = getattr(obj, '__kwdefaults__', None) - pickler.save_reduce(_create_function, (obj.__code__, - globs, obj.__name__, - obj.__defaults__, obj.__closure__, - obj.__dict__, fkwdefaults), obj=obj) + postproc_list = [] + if _recurse: + if id(obj.__dict__) in pickler.memo: + members = obj.__dict__ else: - _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) - if _super: pickler._byref = True - pickler.save_reduce(_create_function, (obj.func_code, - globs, obj.func_name, - obj.func_defaults, obj.func_closure, - obj.__dict__), obj=obj) - if _super: pickler._byref = _byref - pickler._recurse = _recurse + postproc_list.append((setattr, (obj, '__dict__', obj.__dict__))) + members = None + if id(globs) not in pickler.memo: + globs_old = globs + globs = {'__name__': obj.__module__} + if type(globs_old) is not dict: + globs = dict.__new__(type(globs_old), globs) + # Code from Python 2 cannot be transfered to Python 3 anyway because of bytecode incompatibility, so this good + postproc_list.append((dict.update, (globs, globs_old))) else: - if PY3: - #NOTE: workaround for 'super' (see issue #75) - _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) - fkwdefaults = getattr(obj, '__kwdefaults__', None) - _save_with_postproc(pickler, (_create_function, ( - obj.__code__, globs, obj.__name__, obj.__defaults__, - obj.__closure__, obj.__dict__, fkwdefaults - )), obj=obj) - else: - _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) - if _super: pickler._byref = True - _save_with_postproc(pickler, (_create_function, ( - obj.func_code, globs, obj.func_name, obj.func_defaults, - obj.func_closure, obj.__dict__ - )), obj=obj) - if _super: pickler._byref = _byref + members = obj.__dict__ + if PY3: + #NOTE: workaround for 'super' (see issue #75) + _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) + fkwdefaults = getattr(obj, '__kwdefaults__', None) + _save_with_postproc(pickler, (_create_function, ( + obj.__code__, globs, obj.__name__, obj.__defaults__, + obj.__closure__, members, fkwdefaults + )), obj=obj, postproc_list=postproc_list) #, delayed_objs=(globs) + else: + _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) + _save_with_postproc(pickler, (_create_function, ( + obj.func_code, globs, obj.func_name, obj.func_defaults, + obj.func_closure, members + )), obj=obj, postproc_list=postproc_list) #clear = (_byref, _super, _recurse, _memo) #print(clear + (OLDER,)) #NOTE: workaround for #234; "partial" still is problematic for recurse @@ -1739,7 +1750,6 @@ def pickles(obj,exact=False,safe=False,**kwds): #FIXME: should be "(pik == obj).all()" for numpy comparison, though that'll fail if shapes differ result = bool(pik.all() == obj.all()) except AttributeError: - import warnings warnings.filterwarnings('ignore') result = pik == obj warnings.resetwarnings() diff --git a/tests/test_selected.py b/tests/test_selected.py index a3bf7487..ef2b9f7e 100644 --- a/tests/test_selected.py +++ b/tests/test_selected.py @@ -79,7 +79,7 @@ def test_frame_related(): _is = lambda ok: not ok if dill._dill.IS_PYPY2 else ok ok = dill.pickles(f) if verbose: print ("%s: %s, %s" % (ok, type(f), f)) - assert _is(not ok) #XXX: dill fails + assert not ok ok = dill.pickles(g) if verbose: print ("%s: %s, %s" % (ok, type(g), g)) assert _is(not ok) #XXX: dill fails From 181aa4ad6ca89c711a07e6d5d976bad7e8f13b60 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Mon, 10 Jan 2022 14:13:30 -0500 Subject: [PATCH 44/46] Recursive functions and warnings --- dill/__init__.py | 2 +- dill/_dill.py | 240 +++++++++++++++++++++++----------------- tests/test_classdef.py | 6 +- tests/test_recursive.py | 102 ++++++++++++----- 4 files changed, 220 insertions(+), 130 deletions(-) diff --git a/dill/__init__.py b/dill/__init__.py index 1cf886ce..b13accfb 100644 --- a/dill/__init__.py +++ b/dill/__init__.py @@ -25,7 +25,7 @@ from ._dill import dump, dumps, load, loads, dump_session, load_session, \ Pickler, Unpickler, register, copy, pickle, pickles, check, \ HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, UnpicklingError, \ - HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE + HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PicklingWarning, UnpicklingWarning from . import source, temp, detect # get global settings diff --git a/dill/_dill.py b/dill/_dill.py index 919bbd38..cb17cae4 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -72,7 +72,7 @@ def _trace(boolean): GeneratorType, DictProxyType, XRangeType, SliceType, TracebackType, \ NotImplementedType, EllipsisType, FrameType, ModuleType, \ BufferType, BuiltinMethodType, TypeType -from pickle import HIGHEST_PROTOCOL, PicklingError, UnpicklingError +from pickle import HIGHEST_PROTOCOL, PickleError, PicklingError, UnpicklingError try: from pickle import DEFAULT_PROTOCOL except ImportError: @@ -274,7 +274,7 @@ def __init__(self, name, module_name=None): # Use the calling frame's module self.__module__ = inspect.currentframe().f_back.f_globals['__name__'] else: - self.__module__ = module_name + self.__module__ = module_name # pragma: no cover def __repr__(self): return self.__module__ + '.' + self.name # pragma: no cover def __copy__(self): @@ -488,37 +488,14 @@ def __missing__(self, key): else: raise KeyError() +class PickleWarning(Warning, PickleError): + pass -def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO_DEFAULT, postproc_list=None): - if obj is Getattr.NO_DEFAULT: - obj = Reduce(reduction) - - if is_pickler_dill is None: - is_pickler_dill = is_dill(pickler, child=True) - if is_pickler_dill: - # assert id(obj) not in pickler._postproc, str(obj) + ' already pushed on stack!' - # if not hasattr(pickler, 'x'): pickler.x = 0 - # print(pickler.x*' ', 'push', obj, id(obj), pickler._recurse) - # pickler.x += 1 - if postproc_list is None: - postproc_list = [] - pickler._postproc[id(obj)] = postproc_list - - # TODO: Use state_setter in Python 3.8 to allow for faster cPickle implementations - pickler.save_reduce(*reduction, obj=obj) +class PicklingWarning(PickleWarning, PicklingError): + pass - if is_pickler_dill: - # pickler.x -= 1 - # print(pickler.x*' ', 'pop', obj, id(obj)) - postproc = pickler._postproc.pop(id(obj)) - # assert postproc_list == postproc, 'Stack tampered!' - for reduction in reversed(postproc): - pickler.save_reduce(*reduction) - # pop None created by _setattr off stack - if PY3: - pickler.write(bytes('0', 'UTF-8')) - else: - pickler.write('0') +class UnpicklingWarning(PickleWarning, UnpicklingError): + pass ### Extend the Picklers class Pickler(StockPickler): @@ -1049,6 +1026,56 @@ def _locate_function(obj, session=False): found = _import_module(obj.__module__ + '.' + obj.__name__, safe=True) return found is obj + +def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO_DEFAULT, postproc_list=None): + if obj is Getattr.NO_DEFAULT: + obj = Reduce(reduction) # pragma: no cover + + if is_pickler_dill is None: + is_pickler_dill = is_dill(pickler, child=True) + if is_pickler_dill: + # assert id(obj) not in pickler._postproc, str(obj) + ' already pushed on stack!' + # if not hasattr(pickler, 'x'): pickler.x = 0 + # print(pickler.x*' ', 'push', obj, id(obj), pickler._recurse) + # pickler.x += 1 + if postproc_list is None: + postproc_list = [] + + # Recursive object not supported. Default to a global instead. + if id(obj) in pickler._postproc: + name = '(%s.%s) ' % (obj.__module__, getattr(obj, '__qualname__', obj.__name__)) if hasattr(obj, '__module__') else '' + warnings.warn('%s %shas strange recursive properties. Cannot pickle.' % (obj, name, PicklingWarning)) + pickler.save_global(obj) + return + pickler._postproc[id(obj)] = postproc_list + + # TODO: Use state_setter in Python 3.8 to allow for faster cPickle implementations + pickler.save_reduce(*reduction, obj=obj) + + if is_pickler_dill: + # pickler.x -= 1 + # print(pickler.x*' ', 'pop', obj, id(obj)) + postproc = pickler._postproc.pop(id(obj)) + # assert postproc_list == postproc, 'Stack tampered!' + for reduction in reversed(postproc): + if reduction[0] is dict.update and type(reduction[1][0]) is dict: + # use the internal machinery of pickle.py to speedup when + # updating a dictionary in postproc + dest, source = reduction[1] + if source: + pickler.write(pickler.get(pickler.memo[id(dest)][0])) + pickler._batch_setitems(iter(source.items())) + else: + # Updating with an empty dictionary. Same as doing nothing. + continue + else: + pickler.save_reduce(*reduction) + # pop None created by calling preprocessing step off stack + if PY3: + pickler.write(bytes('0', 'UTF-8')) + else: + pickler.write('0') + #@register(CodeType) #def save_code(pickler, obj): # log.info("Co: %s" % obj) @@ -1560,7 +1587,7 @@ def save_module(pickler, obj): log.info("# M1") elif PY3 and obj.__name__ == "dill._dill": log.info("M2: %s" % obj) - pickler.save_global(obj) + pickler.save_global(obj, name="_dill") log.info("# M2") else: log.info("M2: %s" % obj) @@ -1581,6 +1608,7 @@ def save_type(pickler, obj, postproc_list=None): pickler.save_reduce(_create_namedtuple, (getattr(obj, "__qualname__", obj.__name__), obj._fields, obj.__module__), obj=obj) log.info("# T6") return + # special cases: NoneType, NotImplementedType, EllipsisType elif obj is type(None): log.info("T7: %s" % obj) @@ -1599,46 +1627,40 @@ def save_type(pickler, obj, postproc_list=None): pickler.save_reduce(type, (Ellipsis,), obj=obj) log.info("# T7") - elif not _locate_function(obj): # not a function, but the name was held over - pickler_is_dill = is_dill(pickler, child=True) - # assert id(obj) not in pickler._postproc - if issubclass(type(obj), type): - # try: # used when pickling the class as code (or the interpreter) - if pickler_is_dill and not pickler._byref and id(obj) not in pickler._postproc: + else: + obj_name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) + _byref = getattr(pickler, '_byref', None) + obj_recursive = id(obj) in getattr(pickler, '_postproc', ()) + if not _byref and not obj_recursive and not _locate_function(obj): # not a function, but the name was held over + if issubclass(type(obj), type): # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' log.info("%s: %s" % (_t, obj)) _dict = _dict_from_dictproxy(obj.__dict__) - # except: # punt to StockPickler (pickle by class reference) else: - warnings.warn('The byref setting is on, but %s cannot be located.' % (obj,), RuntimeWarning) - log.info("T5: %s" % obj) - name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) - StockPickler.save_global(pickler, obj, name=name) - log.info("# T5") - return + _t = 'T3' + log.info("%s: %s" % (_t, obj)) + _dict = obj.__dict__ + #print (_dict) + #print ("%s\n%s" % (type(obj), obj.__name__)) + #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) + for name in _dict.get("__slots__", []): + del _dict[name] + _save_with_postproc(pickler, (_create_type, ( + type(obj), obj_name, obj.__bases__, _dict + )), obj=obj, postproc_list=postproc_list) + log.info("# %s" % _t) else: - _t = 'T3' - log.info("%s: %s" % (_t, obj)) - _dict = obj.__dict__ - #print (_dict) - #print ("%s\n%s" % (type(obj), obj.__name__)) - #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) - for name in _dict.get("__slots__", []): - del _dict[name] - name = getattr(obj, "__qualname__", obj.__name__) - _save_with_postproc(pickler, (_create_type, ( - type(obj), name, obj.__bases__, _dict - )), pickler_is_dill, obj=obj, postproc_list=postproc_list) - log.info("# %s" % _t) - else: - log.info("T4: %s" % obj) - #print (obj.__dict__) - #print ("%s\n%s" % (type(obj), obj.__name__)) - #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) - name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) - StockPickler.save_global(pickler, obj, name=name) - log.info("# T4") + log.info("T4: %s" % obj) + if _byref: + warnings.warn('The byref setting is on, but %s cannot be located.' % (obj,), PicklingWarning) + if obj_recursive: + warnings.warn('%s.__dict__ contains %s. Cannot pickle recursive classes.' % (obj_name, obj), PicklingWarning) + #print (obj.__dict__) + #print ("%s\n%s" % (type(obj), obj.__name__)) + #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) + StockPickler.save_global(pickler, obj, name=obj_name) + log.info("# T4") return # Error in PyPy 2.7 when adding ABC support @@ -1665,60 +1687,78 @@ def save_classmethod(pickler, obj): orig_func = obj.__get__(None, object) if isinstance(obj, classmethod): orig_func = getattr(orig_func, im_func) # Unbind - pickler.save_reduce(type(obj), (orig_func,), obj=obj) + + if PY3: + if type(obj.__dict__) is dict: + if obj.__dict__: + state = obj.__dict__ + else: + state = None + else: + state = (None, {'__dict__', obj.__dict__}) + else: + state = None + + pickler.save_reduce(type(obj), (orig_func,), state, obj=obj) log.info("# Cm") @register(FunctionType) def save_function(pickler, obj): + # obj_recursive = id(obj) in getattr(pickler, '_postproc', ()) + # assert not obj_recursive, '%s has a bizarre structure. Open an issue.' % (obj,) if not _locate_function(obj): #, pickler._session): log.info("F1: %s" % obj) - _postproc = getattr(pickler, '_postproc', ()) - _byref = getattr(pickler, '_byref', None) _recurse = getattr(pickler, '_recurse', None) - _memo = (id(obj) in _postproc) and (_recurse is not None) - if _recurse and not _memo: + _byref = getattr(pickler, '_byref', None) + _postproc = getattr(pickler, '_postproc', None) + postproc_list = [] + if _recurse: # recurse to get all globals referred to by obj from .detect import globalvars - globs = globalvars(obj, recurse=True, builtin=True) + globs_copy = globalvars(obj, recurse=True, builtin=True) + + # Add the name of the module to the globs dictionary to prevent + # the duplication of the dictionary. Pickle the unpopulated + # globals dictionary and set the remaining items after the function + # is created to correctly handle recursion. + globs = {'__name__': obj.__module__} else: - globs = obj.__globals__ if PY3 else obj.func_globals - postproc_list = [] - if _recurse: - if id(obj.__dict__) in pickler.memo: - members = obj.__dict__ + globs_copy = obj.__globals__ if PY3 else obj.func_globals + + # If the globals is a module __dict__, do not save it in the pickle. + if globs_copy is not None and obj.__module__ is not None and \ + getattr(_import_module(obj.__module__, True), '__dict__', None) is globs_copy: + globs = globs_copy else: - postproc_list.append((setattr, (obj, '__dict__', obj.__dict__))) - members = None - if id(globs) not in pickler.memo: - globs_old = globs globs = {'__name__': obj.__module__} - if type(globs_old) is not dict: - globs = dict.__new__(type(globs_old), globs) - # Code from Python 2 cannot be transfered to Python 3 anyway because of bytecode incompatibility, so this good - postproc_list.append((dict.update, (globs, globs_old))) - else: - members = obj.__dict__ + + if globs_copy is not None and globs is not globs_copy: + # In the case that the globals are copied, we need to ensure that + # the globals dictionary is updated when all objects in the + # dictionary are already created. + if PY3: + glob_ids = {id(g) for g in globs_copy.values()} + else: + glob_ids = {id(g) for g in globs_copy.itervalues()} + for stack_element in _postproc: + if stack_element in glob_ids: + _postproc[stack_element].append((dict.update, (globs, globs_copy))) + break + else: + postproc_list.append((dict.update, (globs, globs_copy))) + if PY3: - #NOTE: workaround for 'super' (see issue #75) - _super = ('super' in getattr(obj.__code__,'co_names',())) and (_byref is not None) + #NOTE: workaround for 'super' (see issue #75) removed in #443 fkwdefaults = getattr(obj, '__kwdefaults__', None) _save_with_postproc(pickler, (_create_function, ( obj.__code__, globs, obj.__name__, obj.__defaults__, - obj.__closure__, members, fkwdefaults - )), obj=obj, postproc_list=postproc_list) #, delayed_objs=(globs) + obj.__closure__, obj.__dict__, fkwdefaults + )), obj=obj, postproc_list=postproc_list) else: - _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False) _save_with_postproc(pickler, (_create_function, ( obj.func_code, globs, obj.func_name, obj.func_defaults, - obj.func_closure, members + obj.func_closure, obj.__dict__ )), obj=obj, postproc_list=postproc_list) - #clear = (_byref, _super, _recurse, _memo) - #print(clear + (OLDER,)) - #NOTE: workaround for #234; "partial" still is problematic for recurse - if OLDER and not _byref and (_super or (not _super and _memo) or (not _super and not _memo and _recurse)): pickler.clear_memo() - #if _memo: - # #pickler.clear_memo() - # #StockPickler.clear_memo(pickler) log.info("# F1") else: log.info("F2: %s" % obj) diff --git a/tests/test_classdef.py b/tests/test_classdef.py index 3b2442e9..5f07be5e 100644 --- a/tests/test_classdef.py +++ b/tests/test_classdef.py @@ -85,8 +85,10 @@ def test_class_objects(): assert type(_cls).__name__ == "_meta" # test NoneType -def test_none(): +def test_specialtypes(): assert dill.pickles(type(None)) + assert dill.pickles(type(NotImplemented)) + assert dill.pickles(type(Ellipsis)) if hex(sys.hexversion) >= '0x20600f0': from collections import namedtuple @@ -204,7 +206,7 @@ def test_slots(): if __name__ == '__main__': test_class_instances() test_class_objects() - test_none() + test_specialtypes() test_namedtuple() test_dtype() test_array_nested() diff --git a/tests/test_recursive.py b/tests/test_recursive.py index ecd4536f..a042385f 100644 --- a/tests/test_recursive.py +++ b/tests/test_recursive.py @@ -8,6 +8,25 @@ import dill from dill._dill import PY3 from functools import partial +import warnings + + +def copy(obj, byref=False, recurse=False): + if byref: + try: + return dill.copy(obj, byref=byref, recurse=recurse) + except: + pass + else: + raise AssertionError('Copy of %s with byref=True should have given a warning!' % (obj,)) + + warnings.simplefilter('ignore') + val = dill.copy(obj, byref=byref, recurse=recurse) + warnings.simplefilter('error') + return val + else: + return dill.copy(obj, byref=byref, recurse=recurse) + class obj1(object): def __init__(self): @@ -24,20 +43,20 @@ def __init__(self): def test_super(): - assert dill.copy(obj1(), byref=True) - assert dill.copy(obj1(), byref=True, recurse=True) - assert dill.copy(obj1(), recurse=True) - assert dill.copy(obj1()) + assert copy(obj1(), byref=True) + assert copy(obj1(), byref=True, recurse=True) + assert copy(obj1(), recurse=True) + assert copy(obj1()) - assert dill.copy(obj2(), byref=True) - assert dill.copy(obj2(), byref=True, recurse=True) - assert dill.copy(obj2(), recurse=True) - assert dill.copy(obj2()) + assert copy(obj2(), byref=True) + assert copy(obj2(), byref=True, recurse=True) + assert copy(obj2(), recurse=True) + assert copy(obj2()) - assert dill.copy(obj3(), byref=True) - assert dill.copy(obj3(), byref=True, recurse=True) - assert dill.copy(obj3(), recurse=True) - assert dill.copy(obj3()) + assert copy(obj3(), byref=True) + assert copy(obj3(), byref=True, recurse=True) + assert copy(obj3(), recurse=True) + assert copy(obj3()) def get_trigger(model): @@ -55,10 +74,10 @@ class Model(object): def test_partial(): - assert dill.copy(Machine(), byref=True) - assert dill.copy(Machine(), byref=True, recurse=True) - assert dill.copy(Machine(), recurse=True) - assert dill.copy(Machine()) + assert copy(Machine(), byref=True) + assert copy(Machine(), byref=True, recurse=True) + assert copy(Machine(), recurse=True) + assert copy(Machine()) class Machine2(object): @@ -74,10 +93,10 @@ def __init__(self): def test_partials(): - assert dill.copy(SubMachine(), byref=True) - assert dill.copy(SubMachine(), byref=True, recurse=True) - assert dill.copy(SubMachine(), recurse=True) - assert dill.copy(SubMachine()) + assert copy(SubMachine(), byref=True) + assert copy(SubMachine(), byref=True, recurse=True) + assert copy(SubMachine(), recurse=True) + assert copy(SubMachine()) class obj4(object): @@ -92,7 +111,7 @@ def __init__(self): def test_circular_reference(): - assert dill.copy(obj4()) + assert copy(obj4()) obj4_copy = dill.loads(dill.dumps(obj4())) if PY3: assert type(obj4_copy) is type(obj4_copy).__init__.__closure__[0].cell_contents @@ -106,12 +125,41 @@ def g(): def test_function_cells(): - assert dill.copy(f()) + assert copy(f()) + + +def fib(n): + assert n >= 0 + if n <= 1: + return n + else: + return fib(n-1) + fib(n-2) + + +def test_recursive_function(): + global fib + fib2 = copy(fib, recurse=True) + fib3 = copy(fib) + fib4 = fib + del fib + assert fib2(5) == 5 + for _fib in (fib3, fib4): + try: + _fib(5) + except: + # This is expected to fail because fib no longer exists + pass + else: + raise AssertionError("Function fib shouldn't have been found") + fib = fib4 if __name__ == '__main__': - test_super() - test_partial() - test_partials() - test_circular_reference() - test_function_cells() + with warnings.catch_warnings(): + warnings.simplefilter('error') + test_super() + test_partial() + test_partials() + test_circular_reference() + test_function_cells() + test_recursive_function() From a7797cb7cf474c473f9f59dcb4b5452456b46639 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Mon, 24 Jan 2022 16:26:49 -0800 Subject: [PATCH 45/46] Better warning messages --- dill/__init__.py | 3 ++- dill/_dill.py | 19 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/dill/__init__.py b/dill/__init__.py index b13accfb..34ac5169 100644 --- a/dill/__init__.py +++ b/dill/__init__.py @@ -25,7 +25,8 @@ from ._dill import dump, dumps, load, loads, dump_session, load_session, \ Pickler, Unpickler, register, copy, pickle, pickles, check, \ HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, UnpicklingError, \ - HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PicklingWarning, UnpicklingWarning + HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PickleError, PickleWarning, \ + PicklingWarning, UnpicklingWarning from . import source, temp, detect # get global settings diff --git a/dill/_dill.py b/dill/_dill.py index cb17cae4..9f92931f 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -18,7 +18,8 @@ __all__ = ['dump','dumps','load','loads','dump_session','load_session', 'Pickler','Unpickler','register','copy','pickle','pickles', 'check','HIGHEST_PROTOCOL','DEFAULT_PROTOCOL','PicklingError', - 'UnpicklingError','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE'] + 'UnpicklingError','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE', + 'PickleError','PickleWarning','PicklingWarning','UnpicklingWarning'] import logging log = logging.getLogger("dill") @@ -1043,8 +1044,8 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO # Recursive object not supported. Default to a global instead. if id(obj) in pickler._postproc: - name = '(%s.%s) ' % (obj.__module__, getattr(obj, '__qualname__', obj.__name__)) if hasattr(obj, '__module__') else '' - warnings.warn('%s %shas strange recursive properties. Cannot pickle.' % (obj, name, PicklingWarning)) + name = '%s.%s ' % (obj.__module__, getattr(obj, '__qualname__', obj.__name__)) if hasattr(obj, '__module__') else '' + warnings.warn('Cannot perfectly pickle %r: %shas recursive self-references that would trigger a RecursionError.' % (obj, name, PicklingWarning)) pickler.save_global(obj) return pickler._postproc[id(obj)] = postproc_list @@ -1631,7 +1632,8 @@ def save_type(pickler, obj, postproc_list=None): obj_name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) _byref = getattr(pickler, '_byref', None) obj_recursive = id(obj) in getattr(pickler, '_postproc', ()) - if not _byref and not obj_recursive and not _locate_function(obj): # not a function, but the name was held over + incorrectly_named = not _locate_function(obj) + if not _byref and not obj_recursive and incorrectly_named: # not a function, but the name was held over if issubclass(type(obj), type): # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' @@ -1652,10 +1654,10 @@ def save_type(pickler, obj, postproc_list=None): log.info("# %s" % _t) else: log.info("T4: %s" % obj) - if _byref: - warnings.warn('The byref setting is on, but %s cannot be located.' % (obj,), PicklingWarning) + if incorrectly_named: + warnings.warn('Cannot locate reference to %r.' % (obj,), PicklingWarning) if obj_recursive: - warnings.warn('%s.__dict__ contains %s. Cannot pickle recursive classes.' % (obj_name, obj), PicklingWarning) + warnings.warn('Cannot perfectly pickle %r: %s.%s has recursive self-references that would trigger a RecursionError.' % (obj, obj.__module__, obj_name, PicklingWarning)) #print (obj.__dict__) #print ("%s\n%s" % (type(obj), obj.__name__)) #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) @@ -1704,8 +1706,6 @@ def save_classmethod(pickler, obj): @register(FunctionType) def save_function(pickler, obj): - # obj_recursive = id(obj) in getattr(pickler, '_postproc', ()) - # assert not obj_recursive, '%s has a bizarre structure. Open an issue.' % (obj,) if not _locate_function(obj): #, pickler._session): log.info("F1: %s" % obj) _recurse = getattr(pickler, '_recurse', None) @@ -1748,7 +1748,6 @@ def save_function(pickler, obj): postproc_list.append((dict.update, (globs, globs_copy))) if PY3: - #NOTE: workaround for 'super' (see issue #75) removed in #443 fkwdefaults = getattr(obj, '__kwdefaults__', None) _save_with_postproc(pickler, (_create_function, ( obj.__code__, globs, obj.__name__, obj.__defaults__, From e2a3b9869a6a79d9bbca4b6b7f4f2cde5aec33a7 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 26 Jan 2022 15:21:14 -0800 Subject: [PATCH 46/46] Remove words "perfectly" and "would" --- dill/_dill.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 9f92931f..2dac61c6 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1045,7 +1045,7 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO # Recursive object not supported. Default to a global instead. if id(obj) in pickler._postproc: name = '%s.%s ' % (obj.__module__, getattr(obj, '__qualname__', obj.__name__)) if hasattr(obj, '__module__') else '' - warnings.warn('Cannot perfectly pickle %r: %shas recursive self-references that would trigger a RecursionError.' % (obj, name, PicklingWarning)) + warnings.warn('Cannot pickle %r: %shas recursive self-references that trigger a RecursionError.' % (obj, name), PicklingWarning) pickler.save_global(obj) return pickler._postproc[id(obj)] = postproc_list @@ -1657,7 +1657,7 @@ def save_type(pickler, obj, postproc_list=None): if incorrectly_named: warnings.warn('Cannot locate reference to %r.' % (obj,), PicklingWarning) if obj_recursive: - warnings.warn('Cannot perfectly pickle %r: %s.%s has recursive self-references that would trigger a RecursionError.' % (obj, obj.__module__, obj_name, PicklingWarning)) + warnings.warn('Cannot pickle %r: %s.%s has recursive self-references that trigger a RecursionError.' % (obj, obj.__module__, obj_name), PicklingWarning) #print (obj.__dict__) #print ("%s\n%s" % (type(obj), obj.__name__)) #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) @@ -1690,18 +1690,18 @@ def save_classmethod(pickler, obj): if isinstance(obj, classmethod): orig_func = getattr(orig_func, im_func) # Unbind - if PY3: - if type(obj.__dict__) is dict: - if obj.__dict__: - state = obj.__dict__ - else: - state = None - else: - state = (None, {'__dict__', obj.__dict__}) - else: - state = None - - pickler.save_reduce(type(obj), (orig_func,), state, obj=obj) + # if PY3: + # if type(obj.__dict__) is dict: + # if obj.__dict__: + # state = obj.__dict__ + # else: + # state = None + # else: + # state = (None, {'__dict__', obj.__dict__}) + # else: + # state = None + + pickler.save_reduce(type(obj), (orig_func,), obj=obj) log.info("# Cm") @register(FunctionType)