Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow declaring weak properties #211

Merged
merged 16 commits into from
Jul 23, 2021
1 change: 1 addition & 0 deletions changes/210.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added support to declare weak properties on custom Objective-C classes.
66 changes: 58 additions & 8 deletions rubicon/objc/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,26 +368,36 @@ class MySubclass(NSObject):
the generated setter keeps the stored object retained, and releases it when it is replaced.

In a custom Objective-C protocol, only the metadata for the property is generated.

If ``weak`` is ``True``, the property will be created as a weak property. When assigning an object to it,
the reference count of the object will not be increased. When the object is deallocated, the property
value is set to None.
"""

def __init__(self, vartype=objc_id):
def __init__(self, vartype=objc_id, weak=False):
super().__init__()

self.vartype = ctype_for_type(vartype)
self.weak = weak

def _get_property_attributes(self):
attrs = [
objc_property_attribute_t(b'T', encoding_for_ctype(self.vartype)), # Type: vartype
]
if issubclass(self.vartype, objc_id):
attrs.append(objc_property_attribute_t(b'&', b'')) # retain
reference = b'W' if self.weak else b'&'
attrs.append(objc_property_attribute_t(reference, b''))
return (objc_property_attribute_t * len(attrs))(*attrs)

def class_register(self, class_ptr, attr_name):
add_ivar(class_ptr, '_' + attr_name, self.vartype)

ivar_name = '_' + attr_name

add_ivar(class_ptr, ivar_name, self.vartype)

def _objc_getter(objc_self, _cmd):
value = get_ivar(objc_self, '_' + attr_name)
value = get_ivar(objc_self, ivar_name, weak=self.weak)

# ctypes complains when a callback returns a "boxed" primitive type, so we have to manually unbox it.
# If the data object has a value attribute and is not a structure or union, assume that it is
# a primitive and unbox it.
Expand All @@ -403,12 +413,21 @@ def _objc_setter(objc_self, _cmd, new_value):
if not isinstance(new_value, self.vartype):
# If vartype is a primitive, then new_value may be unboxed. If that is the case, box it manually.
new_value = self.vartype(new_value)
old_value = get_ivar(objc_self, '_' + attr_name)
if issubclass(self.vartype, objc_id) and new_value:

if issubclass(self.vartype, objc_id) and not self.weak:
old_value = get_ivar(objc_self, ivar_name, weak=self.weak)

if new_value.value == old_value.value:
# old and new value are the same, nothing to do
return

if not self.weak and issubclass(self.vartype, objc_id) and new_value:
# If the new value is a non-null object, retain it.
send_message(new_value, 'retain', restype=objc_id, argtypes=[])
set_ivar(objc_self, '_' + attr_name, new_value)
if issubclass(self.vartype, objc_id) and old_value:

set_ivar(objc_self, ivar_name, new_value, weak=self.weak)

if not self.weak and issubclass(self.vartype, objc_id) and old_value:
# If the old value is a non-null object, release it.
send_message(old_value, 'release', restype=None, argtypes=[])

Expand All @@ -426,6 +445,19 @@ def _objc_setter(objc_self, _cmd, new_value):
attrs = self._get_property_attributes()
libobjc.class_addProperty(class_ptr, ensure_bytes(attr_name), attrs, len(attrs))

def dealloc_callback(self, objc_self, attr_name):

ivar_name = '_' + attr_name

# Clean up ivar.
if self.weak:
# Clean up weak reference.
set_ivar(objc_self, ivar_name, self.vartype(None), weak=True)
elif issubclass(self.vartype, objc_id):
# If the old value is a non-null object, release it. There is no need to set the actual ivar to nil.
old_value = get_ivar(objc_self, ivar_name, weak=self.weak)
send_message(old_value, 'release', restype=None, argtypes=[])

def protocol_register(self, proto_ptr, attr_name):
attrs = self._get_property_attributes()
libobjc.protocol_addProperty(proto_ptr, ensure_bytes(attr_name), attrs, len(attrs), True, True)
Expand Down Expand Up @@ -968,6 +1000,24 @@ def _new_from_class_statement(cls, name, bases, attrs, *, protocols):
else:
class_register(ptr, attr_name)

# Add cleanup of ivars / properties to dealloc

old_dealloc = libobjc.class_getMethodImplementation(ptr, SEL("dealloc"))

def _new_delloc(objc_self, _cmd):

# Invoke dealloc callback of each property.
for attr_name, obj in attrs.items():
if isinstance(obj, objc_property):
obj.dealloc_callback(objc_self, attr_name)

# Invoke original dealloc.
cfunctype = CFUNCTYPE(None, objc_id, SEL)
old_dealloc_callable = cast(old_dealloc, cfunctype)
old_dealloc_callable(objc_self, SEL("dealloc"))

add_method(ptr, "dealloc", _new_delloc, [None, ObjCInstance, SEL], replace=True)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What behavior do we want if a class contains both a user-defined dealloc method and properties with dealloc callbacks? The implementation here will call the dealloc callbacks before the user-defined dealloc method. But in practice I think the opposite order would be more useful - that way the user-defined dealloc could call methods on objects stored in properties before they are released by the callbacks (which could cause them to be deallocated right away).

Implementing this would be a bit more difficult. A user-defined dealloc is expected to end with a call to the superclass dealloc. But if a class has dealloc callbacks, those would need before the superclass dealloc (but also after the user-defined dealloc code). There's no way for Rubicon to insert the dealloc callbacks into the user's dealloc method, so the user would have to manually call the dealloc callbacks before the super call (via an extra function/method provided by Rubicon).

A different solution would be to allow users to define their own dealloc callback that gets called before any dealloc callbacks from properties. Then users could use that callback instead of manually overriding dealloc, and let Rubicon generate a dealloc method that calls all callbacks in the right order.

Copy link
Member Author

@samschott samschott Jul 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that calling the user's dealloc before clearing any instance variables can make life at lot easier for the user. But I don't really like either of the proposed solutions, both look like workarounds that force the user to become aware of the memory management which we are otherwise performing behind the scenes.

I can possibly think of two alternative approaches that could provide a better user experience (but would complicate our own implementation):

  1. We document that user should not call the dealloc of the super class in their own implementation, as in a proper ARC environment. We then call the following methods in order: First, any user-defined dealloc, second, our cleanup code and third the super class's dealloc. This would mean treating the dealloc definition differently from other method definitions, at least internally. It would however be transparent to the user.

  2. We call our own cleanup code after calling the old delloc implementation (which includes the user's code and any calls to the super dealloc). I'm not sure if this is possible to do reliably.

What do you think, is either of those options worth the effort?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Option 1 would be ideal, because it's simple to use and matches what you do in normal ARC Objective-C code. But implementing this behavior now would silently break existing code that overrides dealloc and correctly calls the superclass dealloc at the end - that would cause the superclass dealloc to get called twice, which can lead to other objects being released too often and other similar problems. That's the main reason why I suggested adding a separate user-definable callback for the same purpose.

Rubicon is still before version 1.0, so we could still make breaking changes like this, especially if it improves usability in the long term. But if we do that, we should try to throw errors for code relying on the old behavior, to avoid silent double frees in code that was previously correct.

I honestly don't know if option 2 would be safe or not... There should be no way for the superclass deallocs to corrupt ivars defined by the subclass, but I wouldn't really rely on it. Especially NSObject's top-level dealloc could do things that make the entire object unusable somehow.

Another alternative would be to not touch user-defined deallocs at all, and only generate an automatic dealloc if the user hasn't already overridden dealloc manually. This would be fully compatible with existing code and should also be simple to implement. The disadvantage is that if the user really needs to add custom code to dealloc, they then have to manually do all the cleanup that the dealloc callbacks would have done automatically.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do prefer Option 1 from those choices, or not touching use-defined deallocs at all together with a clear documentation on manual cleanup. My only issue with Option 1 is, how can we raise an error if user does call the superclass dealloc? Is there an elegant way of doing so? We are not a compiler after all and don't want to inspect the actual code in the user's dealloc.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can confirm that running our cleanup after the superclass dealloc does lead to trouble during the ivar cleanup. In particular, libobjc.class_getInstanceVariable() returns None. So this is not viable.

I've implemented the first option now: calling the user's dealloc, then our cleanup, then the superclass dealloc. For the time being, there is no special error handling if the user calls the superclass dealloc manually. It will however raise (obscure) errors when we run our own cleanup. Without our own cleanup code (for example when no properties are declared), dealloc is called twice and leads to a segfault. In either case, users will notice that something is wrong without knowing what it might be. Not ideal...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The best workaround I can come up with, short of actually inspecting the user's dealloc code when creating the class, is to print a warning when send_super is called, before either segfaulting or failing to complete the dealloc. What do you think, is this acceptable?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the late reply - had some uni stuff that I needed to finish in the last few days.

The implementation you wrote looks good IMO. You're right that we can't do much to guard against old code that still calls send_super at the end of dealloc. The only option I can think of is what you've already suggested - add a special case inside send_super that warns whenever send_super(self, "dealloc", ...) is called. If we do that, we just need to make sure that the warning doesn't appear when Rubicon itself calls send_super(self, "dealloc", ...) - probably using an internal keyword to suppress the warning?

That way I think we could even "fix" the segfaults. If send_super(self, "dealloc", ...) is called without the special internal keyword argument, we can make it show a warning and then return without actually calling the super method. That way, if a user-defined dealloc calls send_super, the super dealloc won't actually be called yet. Then the user-defined dealloc returns to Rubicon, which runs the dealloc callbacks then makes the real send_super call (with the internal keyword argument, so that this time it actually calls the super method).

This isn't a very nice solution - but if it works, I would rather have some less nice code in Rubicon that shows a helpful warning about the change, rather than breaking existing code so that it causes unexplained segfaults.


# Register the ObjC class
libobjc.objc_registerClassPair(ptr)

Expand Down
42 changes: 34 additions & 8 deletions rubicon/objc/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ class objc_property_attribute_t(Structure):

# IMP class_replaceMethod(Class cls, SEL name, IMP imp, const char *types)
libobjc.class_replaceMethod.restype = IMP
libobjc.class_replaceMethod.argtypes = [Class, SEL, Ivar, c_char_p]
libobjc.class_replaceMethod.argtypes = [Class, SEL, IMP, c_char_p]

# BOOL class_respondsToSelector(Class cls, SEL sel)
libobjc.class_respondsToSelector.restype = c_bool
Expand Down Expand Up @@ -361,6 +361,10 @@ class objc_property_attribute_t(Structure):
libobjc.objc_allocateClassPair.restype = Class
libobjc.objc_allocateClassPair.argtypes = [Class, c_char_p, c_size_t]

# id objc_autoreleaseReturnValue(id value)
libobjc.objc_autoreleaseReturnValue.restype = objc_id
libobjc.objc_autoreleaseReturnValue.argtypes = [objc_id]

# Protocol **objc_copyProtocolList(unsigned int *outCount)
# Returns an array of *outcount pointers followed by NULL terminator.
# You must free() the array.
Expand All @@ -383,6 +387,14 @@ class objc_property_attribute_t(Structure):
libobjc.objc_getProtocol.restype = objc_id
libobjc.objc_getProtocol.argtypes = [c_char_p]

# id objc_loadWeakRetained(id *object)
libobjc.objc_loadWeakRetained.restype = objc_id
libobjc.objc_loadWeakRetained.argtypes = [c_void_p]

# id objc_storeWeak(id *object, id value)
libobjc.objc_storeWeak.restype = objc_id
libobjc.objc_storeWeak.argtypes = [c_void_p, objc_id]

# You should set return and argument types depending on context.
# id objc_msgSend(id theReceiver, SEL theSelector, ...)
# id objc_msgSendSuper(struct objc_super *super, SEL op, ...)
Expand Down Expand Up @@ -846,7 +858,7 @@ def send_super(cls, receiver, selector, *args, restype=c_void_p, argtypes=None):
_keep_alive_imps = []


def add_method(cls, selector, method, encoding):
def add_method(cls, selector, method, encoding, replace=False):
"""Add a new instance method to the given class.

To add a class method, add an instance method to the metaclass.
Expand All @@ -856,6 +868,8 @@ def add_method(cls, selector, method, encoding):
:param method: The method implementation, as a Python callable or a C function address.
:param encoding: The method's signature (return type and argument types) as a :class:`list`.
The types of the implicit ``self`` and ``_cmd`` parameters must be included in the signature.
:param replace: If the class already implements a method with the given name, replaces the current implementation
if ``True``. Raises a :class:`ValueError` error otherwise.
:return: The ctypes C function pointer object that was created for the method's implementation.
This return value can be ignored. (In version 0.4.0 and older, callers were required to manually
keep a reference to this function pointer object to ensure that it isn't garbage-collected.
Expand All @@ -874,7 +888,14 @@ def add_method(cls, selector, method, encoding):

cfunctype = CFUNCTYPE(*signature)
imp = cfunctype(method)
libobjc.class_addMethod(cls, selector, cast(imp, IMP), types)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That said, it looks like our existing add_method implementation doesn't check the return value of class_addMethod, so the method would silently do nothing if a conflicting method already exists. So if we keep add_method we should also fix it to raise an error if class_addMethod returns false.

if replace:
libobjc.class_replaceMethod(cls, selector, cast(imp, IMP), types)
else:
res = libobjc.class_addMethod(cls, selector, cast(imp, IMP), types)

if not res:
raise ValueError("A method with the name {!r} already exists".format(selector.name))

_keep_alive_imps.append(imp)
return imp

Expand All @@ -888,7 +909,7 @@ def add_ivar(cls, name, vartype):
)


def get_ivar(obj, varname):
def get_ivar(obj, varname, weak=False):
"""Get the value of obj's ivar named varname.

The returned object is a :mod:`ctypes` data object.
Expand All @@ -909,14 +930,17 @@ def get_ivar(obj, varname):
ivar = libobjc.class_getInstanceVariable(libobjc.object_getClass(obj), ensure_bytes(varname))
vartype = ctype_for_encoding(libobjc.ivar_getTypeEncoding(ivar))

if isinstance(vartype, objc_id):
if weak:
value = libobjc.objc_loadWeakRetained(obj.value + libobjc.ivar_getOffset(ivar))
return libobjc.objc_autoreleaseReturnValue(value)
elif issubclass(vartype, objc_id):
return cast(libobjc.object_getIvar(obj, ivar), vartype)
Copy link
Member Author

@samschott samschott Jun 30, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When is this cast required? Isn't it sufficient to to return a objc_id which later gets converted to the correct ObjCInstance?

If it is required, do we need to add a similar cast before returning the value in the weak case?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right that libobjc.object_getIvar already returns an objc_id. The most common case is that vartype is exactly objc_id, in which case the cast indeed does nothing. I think it's only needed for the less common case where vartype is Class (which we have declared as a subclass of objc_id) so that the return value is cast to Class instead of a plain objc_id.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, ok! So we should perform a similar cast when returning a value in the weak case as well.

else:
return vartype.from_address(obj.value + libobjc.ivar_getOffset(ivar))


def set_ivar(obj, varname, value):
"""Set obj's ivar varname to value.
def set_ivar(obj, varname, value, weak=False):
"""Set obj's ivar varname to value. If ``weak`` is ``True``, only a weak reference to the value is stored.

value must be a :mod:`ctypes` data object whose type matches that of the ivar.
"""
Expand All @@ -940,7 +964,9 @@ def set_ivar(obj, varname, value):
.format(varname, type(value), sizeof(type(value)), vartype, sizeof(vartype))
)

if isinstance(vartype, objc_id):
if weak:
libobjc.objc_storeWeak(obj.value + libobjc.ivar_getOffset(ivar), value)
elif issubclass(vartype, objc_id):
libobjc.object_setIvar(obj, ivar, value)
else:
memmove(obj.value + libobjc.ivar_getOffset(ivar), addressof(value), sizeof(vartype))
41 changes: 41 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,47 @@ class Properties(NSObject):
self.assertEqual(r.size.width, 56)
self.assertEqual(r.size.height, 78)

def test_class_properties_lifecycle_strong(self):

class StrongProperties(NSObject):
object = objc_property(ObjCInstance)

pool = NSAutoreleasePool.alloc().init()

properties = StrongProperties.alloc().init()

obj = NSObject.alloc().init()
obj_pointer = obj.ptr.value # store the object pointer for future use

properties.object = obj

del obj
del pool
gc.collect()

# assert that the object was retained by the property
self.assertEqual(properties.object.ptr.value, obj_pointer)

def test_class_properties_lifecycle_weak(self):

class WeakProperties(NSObject):
object = objc_property(ObjCInstance, weak=True)

pool = NSAutoreleasePool.alloc().init()

properties = WeakProperties.alloc().init()

obj = NSObject.alloc().init()
properties.object = obj

self.assertIs(properties.object, obj)

del obj
del pool
gc.collect()

self.assertIsNone(properties.object)

def test_class_with_wrapped_methods(self):
"""An ObjCClass can have wrapped methods."""

Expand Down