Skip to content

Commit

Permalink
Refactor builtin method pickling (#262)
Browse files Browse the repository at this point in the history
  • Loading branch information
pierreglaser authored and ogrisel committed May 20, 2019
1 parent 74d69d7 commit ce87fe4
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 100 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
1.2.0
=====

- Support pickling of classmethod and staticmethod objects in python2.
arguments. ([issue #262](https://github.com/cloudpipe/cloudpickle/pull/262))

1.1.0
=====

Expand Down
136 changes: 52 additions & 84 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@ def _lookup_class_or_track(class_tracker_id, class_def):
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
return class_def

if PY3:
from pickle import _getattribute
else:
# pickle._getattribute is a python3 addition and enchancement of getattr,
# that can handle dotted attribute names. In cloudpickle for python2,
# handling dotted names is not needed, so we simply define _getattribute as
# a wrapper around getattr.
def _getattribute(obj, name):
return getattr(obj, name, None), None


def _make_cell_set_template_code():
"""Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF
Expand Down Expand Up @@ -244,32 +254,6 @@ def _builtin_type(name):
return getattr(types, name)


def _make__new__factory(type_):
def _factory():
return type_.__new__
return _factory


# NOTE: These need to be module globals so that they're pickleable as globals.
_get_dict_new = _make__new__factory(dict)
_get_frozenset_new = _make__new__factory(frozenset)
_get_list_new = _make__new__factory(list)
_get_set_new = _make__new__factory(set)
_get_tuple_new = _make__new__factory(tuple)
_get_object_new = _make__new__factory(object)

# Pre-defined set of builtin_function_or_method instances that can be
# serialized.
_BUILTIN_TYPE_CONSTRUCTORS = {
dict.__new__: _get_dict_new,
frozenset.__new__: _get_frozenset_new,
set.__new__: _get_set_new,
list.__new__: _get_list_new,
tuple.__new__: _get_tuple_new,
object.__new__: _get_object_new,
}


if sys.version_info < (3, 4): # pragma: no branch
def _walk_global_ops(code):
"""
Expand Down Expand Up @@ -423,28 +407,12 @@ def save_function(self, obj, name=None):
Determines what kind of function obj is (e.g. lambda, defined at
interactive prompt, etc) and handles the pickling appropriately.
"""
try:
should_special_case = obj in _BUILTIN_TYPE_CONSTRUCTORS
except TypeError:
# Methods of builtin types aren't hashable in python 2.
should_special_case = False

if should_special_case:
# We keep a special-cased cache of built-in type constructors at
# global scope, because these functions are structured very
# differently in different python versions and implementations (for
# example, they're instances of types.BuiltinFunctionType in
# CPython, but they're ordinary types.FunctionType instances in
# PyPy).
#
# If the function we've received is in that cache, we just
# serialize it as a lookup into the cache.
return self.save_reduce(_BUILTIN_TYPE_CONSTRUCTORS[obj], (), obj=obj)

write = self.write

if name is None:
name = obj.__name__
name = getattr(obj, '__qualname__', None)
if name is None:
name = getattr(obj, '__name__', None)
try:
# whichmodule() could fail, see
# https://bitbucket.org/gutworth/six/issues/63/importing-six-breaks-pickling
Expand All @@ -462,31 +430,14 @@ def save_function(self, obj, name=None):
themodule = None

try:
lookedup_by_name = getattr(themodule, name, None)
lookedup_by_name, _ = _getattribute(themodule, name)
except Exception:
lookedup_by_name = None

if themodule:
if lookedup_by_name is obj:
return self.save_global(obj, name)

# a builtin_function_or_method which comes in as an attribute of some
# object (e.g., itertools.chain.from_iterable) will end
# up with modname "__main__" and so end up here. But these functions
# have no __code__ attribute in CPython, so the handling for
# user-defined functions below will fail.
# So we pickle them here using save_reduce; have to do it differently
# for different python versions.
if not hasattr(obj, '__code__'):
if PY3: # pragma: no branch
rv = obj.__reduce_ex__(self.proto)
else:
if hasattr(obj, '__self__'):
rv = (getattr, (obj.__self__, name))
else:
raise pickle.PicklingError("Can't pickle %r" % obj)
return self.save_reduce(obj=obj, *rv)

# if func is lambda, def'ed at prompt, is in main, or is nested, then
# we'll pickle the actual function object rather than simply saving a
# reference (as is done in default pickler), via save_function_tuple.
Expand Down Expand Up @@ -813,12 +764,44 @@ def extract_func_data(self, func):

return (code, f_globals, defaults, closure, dct, base_globals)

def save_builtin_function(self, obj):
if obj.__module__ == "__builtin__":
return self.save_global(obj)
return self.save_function(obj)

dispatch[types.BuiltinFunctionType] = save_builtin_function
if not PY3: # pragma: no branch
# Python3 comes with native reducers that allow builtin functions and
# methods pickling as module/class attributes. The following method
# extends this for python2.
# Please note that currently, neither pickle nor cloudpickle support
# dynamically created builtin functions/method pickling.
def save_builtin_function_or_method(self, obj):
is_bound = getattr(obj, '__self__', None) is not None
if is_bound:
# obj is a bound builtin method.
rv = (getattr, (obj.__self__, obj.__name__))
return self.save_reduce(obj=obj, *rv)

is_unbound = hasattr(obj, '__objclass__')
if is_unbound:
# obj is an unbound builtin method (accessed from its class)
rv = (getattr, (obj.__objclass__, obj.__name__))
return self.save_reduce(obj=obj, *rv)

# Otherwise, obj is not a method, but a function. Fallback to
# default pickling by attribute.
return Pickler.save_global(self, obj)

dispatch[types.BuiltinFunctionType] = save_builtin_function_or_method

# A comprehensive summary of the various kinds of builtin methods can
# be found in PEP 579: https://www.python.org/dev/peps/pep-0579/
classmethod_descriptor_type = type(float.__dict__['fromhex'])
wrapper_descriptor_type = type(float.__repr__)
method_wrapper_type = type(1.5.__repr__)

dispatch[classmethod_descriptor_type] = save_builtin_function_or_method
dispatch[wrapper_descriptor_type] = save_builtin_function_or_method
dispatch[method_wrapper_type] = save_builtin_function_or_method

if sys.version_info[:2] < (3, 4):
method_descriptor = type(str.upper)
dispatch[method_descriptor] = save_builtin_function_or_method

def save_global(self, obj, name=None, pack=struct.pack):
"""
Expand Down Expand Up @@ -1345,18 +1328,3 @@ def _is_dynamic(module):
except ImportError:
return True
return False


""" Use copy_reg to extend global pickle definitions """

if sys.version_info < (3, 4): # pragma: no branch
method_descriptor = type(str.upper)

def _reduce_method_descriptor(obj):
return (getattr, (obj.__objclass__, obj.__name__))

try:
import copy_reg as copyreg
except ImportError:
import copyreg
copyreg.pickle(method_descriptor, _reduce_method_descriptor)
161 changes: 145 additions & 16 deletions tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,14 +641,151 @@ def test_NotImplementedType(self):
res = pickle_depickle(type(NotImplemented), protocol=self.protocol)
self.assertEqual(type(NotImplemented), res)

def test_builtin_function_without_module(self):
on = object.__new__
on_depickled = pickle_depickle(on, protocol=self.protocol)
self.assertEqual(type(on_depickled(object)), type(object()))

fi = itertools.chain.from_iterable
fi_depickled = pickle_depickle(fi, protocol=self.protocol)
self.assertEqual(list(fi_depickled([[1, 2], [3, 4]])), [1, 2, 3, 4])
def test_builtin_function(self):
# Note that builtin_function_or_method are special-cased by cloudpickle
# only in python2.

# builtin function from the __builtin__ module
assert pickle_depickle(zip, protocol=self.protocol) is zip

from sys import getcheckinterval
# builtin function from a "regular" module
assert pickle_depickle(
getcheckinterval, protocol=self.protocol) is getcheckinterval

@pytest.mark.skipif(platform.python_implementation() == 'PyPy' and
sys.version_info[:2] == (3, 5),
reason="bug of pypy3.5 in builtin-type constructors")
def test_builtin_type_constructor(self):
# Due to a bug in pypy3.5, cloudpickling builtin-type constructors
# fails. This test makes sure that cloudpickling builtin-type
# constructors works for all other python versions/implementation.

# pickle_depickle some builtin methods of the __builtin__ module
for t in list, tuple, set, frozenset, dict, object:
cloned_new = pickle_depickle(t.__new__, protocol=self.protocol)
assert isinstance(cloned_new(t), t)

# The next 4 tests cover all cases into which builtin python methods can
# appear.
# There are 4 kinds of method: 'classic' methods, classmethods,
# staticmethods and slotmethods. They will appear under different types
# depending on whether they are called from the __dict__ of their
# class, their class itself, or an instance of their class. This makes
# 12 total combinations.
# This discussion and the following tests are relevant for the CPython
# implementation only. In PyPy, there is no builtin method or builtin
# function types/flavours. The only way into which a builtin method can be
# identified is with it's builtin-code __code__ attribute.

def test_builtin_classicmethod(self):
obj = 1.5 # float object

bound_classicmethod = obj.hex # builtin_function_or_method
unbound_classicmethod = type(obj).hex # method_descriptor
clsdict_classicmethod = type(obj).__dict__['hex'] # method_descriptor

assert unbound_classicmethod is clsdict_classicmethod

depickled_bound_meth = pickle_depickle(
bound_classicmethod, protocol=self.protocol)
depickled_unbound_meth = pickle_depickle(
unbound_classicmethod, protocol=self.protocol)
depickled_clsdict_meth = pickle_depickle(
clsdict_classicmethod, protocol=self.protocol)

# No identity on the bound methods they are bound to different float
# instances
assert depickled_bound_meth() == bound_classicmethod()
assert depickled_unbound_meth is unbound_classicmethod
assert depickled_clsdict_meth is clsdict_classicmethod


def test_builtin_classmethod(self):
obj = 1.5 # float object

bound_clsmethod = obj.fromhex # builtin_function_or_method
unbound_clsmethod = type(obj).fromhex # builtin_function_or_method
clsdict_clsmethod = type(
obj).__dict__['fromhex'] # classmethod_descriptor

depickled_bound_meth = pickle_depickle(
bound_clsmethod, protocol=self.protocol)
depickled_unbound_meth = pickle_depickle(
unbound_clsmethod, protocol=self.protocol)
depickled_clsdict_meth = pickle_depickle(
clsdict_clsmethod, protocol=self.protocol)

# float.fromhex takes a string as input.
arg = "0x1"

# Identity on both the bound and the unbound methods cannot be
# tested: the bound methods are bound to different objects, and the
# unbound methods are actually recreated at each call.
assert depickled_bound_meth(arg) == bound_clsmethod(arg)
assert depickled_unbound_meth(arg) == unbound_clsmethod(arg)

if platform.python_implementation() == 'CPython':
# Roundtripping a classmethod_descriptor results in a
# builtin_function_or_method (CPython upstream issue).
assert depickled_clsdict_meth(arg) == clsdict_clsmethod(float, arg)
if platform.python_implementation() == 'PyPy':
# builtin-classmethods are simple classmethod in PyPy (not
# callable). We test equality of types and the functionality of the
# __func__ attribute instead. We do not test the the identity of
# the functions as __func__ attributes of classmethods are not
# pickleable and must be reconstructed at depickling time.
assert type(depickled_clsdict_meth) == type(clsdict_clsmethod)
assert depickled_clsdict_meth.__func__(
float, arg) == clsdict_clsmethod.__func__(float, arg)

def test_builtin_slotmethod(self):
obj = 1.5 # float object

bound_slotmethod = obj.__repr__ # method-wrapper
unbound_slotmethod = type(obj).__repr__ # wrapper_descriptor
clsdict_slotmethod = type(obj).__dict__['__repr__'] # ditto

depickled_bound_meth = pickle_depickle(
bound_slotmethod, protocol=self.protocol)
depickled_unbound_meth = pickle_depickle(
unbound_slotmethod, protocol=self.protocol)
depickled_clsdict_meth = pickle_depickle(
clsdict_slotmethod, protocol=self.protocol)

# No identity tests on the bound slotmethod are they are bound to
# different float instances
assert depickled_bound_meth() == bound_slotmethod()
assert depickled_unbound_meth is unbound_slotmethod
assert depickled_clsdict_meth is clsdict_slotmethod

@pytest.mark.skipif(
platform.python_implementation() == "PyPy" or
sys.version_info[:1] < (3,),
reason="No known staticmethod example in the python 2 / pypy stdlib")
def test_builtin_staticmethod(self):
obj = "foo" # str object

bound_staticmethod = obj.maketrans # builtin_function_or_method
unbound_staticmethod = type(obj).maketrans # ditto
clsdict_staticmethod = type(obj).__dict__['maketrans'] # staticmethod

assert bound_staticmethod is unbound_staticmethod

depickled_bound_meth = pickle_depickle(
bound_staticmethod, protocol=self.protocol)
depickled_unbound_meth = pickle_depickle(
unbound_staticmethod, protocol=self.protocol)
depickled_clsdict_meth = pickle_depickle(
clsdict_staticmethod, protocol=self.protocol)

assert depickled_bound_meth is bound_staticmethod
assert depickled_unbound_meth is unbound_staticmethod

# staticmethod objects are recreated at depickling time, but the
# underlying __func__ object is pickled by attribute.
assert depickled_clsdict_meth.__func__ is clsdict_staticmethod.__func__
type(depickled_clsdict_meth) is type(clsdict_staticmethod)

@pytest.mark.skipif(tornado is None,
reason="test needs Tornado installed")
Expand Down Expand Up @@ -964,14 +1101,6 @@ def test_namedtuple(self):
assert isinstance(depickled_t2, MyTuple)
assert depickled_t2 == t2

def test_builtin_type__new__(self):
# Functions occasionally take the __new__ of these types as default
# parameters for factories. For example, on Python 3.3,
# `tuple.__new__` is a default value for some methods of namedtuple.
for t in list, tuple, set, frozenset, dict, object:
cloned = pickle_depickle(t.__new__, protocol=self.protocol)
self.assertTrue(cloned is t.__new__)

def test_interactively_defined_function(self):
# Check that callables defined in the __main__ module of a Python
# script (or jupyter kernel) can be pickled / unpickled / executed.
Expand Down

0 comments on commit ce87fe4

Please sign in to comment.