From 3597c129413a86f805beca78b7c72a20b5bf801c Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 14 Jun 2022 11:13:29 +0200 Subject: [PATCH] gh-89546: Clean up PyType_FromMetaclass (GH-93686) When changing PyType_FromMetaclass recently (GH-93012, GH-93466, GH-28748) I found a bunch of opportunities to improve the code. Here they are. Fixes: #89546 Automerge-Triggered-By: GH:encukou --- ...2-06-10-16-50-27.gh-issue-89546.mX1f10.rst | 4 + Modules/_testcapimodule.c | 3 +- Objects/typeobject.c | 276 ++++++++++++------ 3 files changed, 192 insertions(+), 91 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst diff --git a/Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst b/Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst new file mode 100644 index 00000000000000..8e6b6d95c7b4ab --- /dev/null +++ b/Misc/NEWS.d/next/C API/2022-06-10-16-50-27.gh-issue-89546.mX1f10.rst @@ -0,0 +1,4 @@ +:c:func:`PyType_FromMetaclass` (and other ``PyType_From*`` functions) now +check that offsets and the base class's +:c:member:`~PyTypeObject.tp_basicsize` fit in the new class's +``tp_basicsize``. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 33dc3dbe493c51..54957db89663e0 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1221,7 +1221,7 @@ static PyType_Spec MinimalMetaclass_spec = { static PyType_Spec MinimalType_spec = { .name = "_testcapi.MinimalSpecType", - .basicsize = sizeof(PyObject), + .basicsize = 0, // Updated later .flags = Py_TPFLAGS_DEFAULT, .slots = empty_type_slots, }; @@ -1245,6 +1245,7 @@ test_from_spec_metatype_inheritance(PyObject *self, PyObject *Py_UNUSED(ignored) goto finally; } + MinimalType_spec.basicsize = (int)(((PyTypeObject*)class)->tp_basicsize); new = PyType_FromSpecWithBases(&MinimalType_spec, class); if (new == NULL) { goto finally; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c6df50da9736bb..db4682c69ed0e3 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3398,30 +3398,87 @@ get_bases_tuple(PyObject *bases_in, PyType_Spec *spec) return PyTuple_Pack(1, bases_in); } +static inline int +check_basicsize_includes_size_and_offsets(PyTypeObject* type) +{ + if (type->tp_alloc != PyType_GenericAlloc) { + // Custom allocators can ignore tp_basicsize + return 1; + } + Py_ssize_t max = (Py_ssize_t)type->tp_basicsize; + + if (type->tp_base && type->tp_base->tp_basicsize > type->tp_basicsize) { + PyErr_Format(PyExc_TypeError, + "tp_basicsize for type '%s' (%d) is too small for base '%s' (%d)", + type->tp_name, type->tp_basicsize, + type->tp_base->tp_name, type->tp_base->tp_basicsize); + return 0; + } + if (type->tp_weaklistoffset + (Py_ssize_t)sizeof(PyObject*) > max) { + PyErr_Format(PyExc_TypeError, + "weaklist offset %d is out of bounds for type '%s' (tp_basicsize = %d)", + type->tp_weaklistoffset, + type->tp_name, type->tp_basicsize); + return 0; + } + if (type->tp_dictoffset + (Py_ssize_t)sizeof(PyObject*) > max) { + PyErr_Format(PyExc_TypeError, + "dict offset %d is out of bounds for type '%s' (tp_basicsize = %d)", + type->tp_dictoffset, + type->tp_name, type->tp_basicsize); + return 0; + } + if (type->tp_vectorcall_offset + (Py_ssize_t)sizeof(vectorcallfunc*) > max) { + PyErr_Format(PyExc_TypeError, + "vectorcall offset %d is out of bounds for type '%s' (tp_basicsize = %d)", + type->tp_vectorcall_offset, + type->tp_name, type->tp_basicsize); + return 0; + } + return 1; +} + PyObject * PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module, PyType_Spec *spec, PyObject *bases_in) { + /* Invariant: A non-NULL value in one of these means this function holds + * a strong reference or owns allocated memory. + * These get decrefed/freed/returned at the end, on both success and error. + */ PyHeapTypeObject *res = NULL; - PyObject *modname = NULL; PyTypeObject *type; PyObject *bases = NULL; + char *tp_doc = NULL; + PyObject *ht_name = NULL; + char *_ht_tpname = NULL; + int r; + /* Prepare slots that need special handling. + * Keep in mind that a slot can be given multiple times: + * if that would cause trouble (leaks, UB, ...), raise an exception. + */ + const PyType_Slot *slot; Py_ssize_t nmembers = 0; Py_ssize_t weaklistoffset, dictoffset, vectorcalloffset; char *res_start; - short slot_offset, subslot_offset; nmembers = weaklistoffset = dictoffset = vectorcalloffset = 0; for (slot = spec->slots; slot->slot; slot++) { - if (slot->slot == Py_tp_members) { + if (slot->slot < 0 + || (size_t)slot->slot >= Py_ARRAY_LENGTH(pyslot_offsets)) { + PyErr_SetString(PyExc_RuntimeError, "invalid slot offset"); + goto finally; + } + switch (slot->slot) { + case Py_tp_members: if (nmembers != 0) { PyErr_SetString( PyExc_SystemError, "Multiple Py_tp_members slots are not supported."); - return NULL; + goto finally; } for (const PyMemberDef *memb = slot->pfunc; memb->name != NULL; memb++) { nmembers++; @@ -3444,15 +3501,70 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module, vectorcalloffset = memb->offset; } } + break; + case Py_tp_doc: + /* For the docstring slot, which usually points to a static string + literal, we need to make a copy */ + if (tp_doc != NULL) { + PyErr_SetString( + PyExc_SystemError, + "Multiple Py_tp_doc slots are not supported."); + goto finally; + } + if (slot->pfunc == NULL) { + PyObject_Free(tp_doc); + tp_doc = NULL; + } + else { + size_t len = strlen(slot->pfunc)+1; + tp_doc = PyObject_Malloc(len); + if (tp_doc == NULL) { + PyErr_NoMemory(); + goto finally; + } + memcpy(tp_doc, slot->pfunc, len); + } + break; } } + /* Prepare the type name and qualname */ + if (spec->name == NULL) { PyErr_SetString(PyExc_SystemError, "Type spec does not define the name field."); goto finally; } + const char *s = strrchr(spec->name, '.'); + if (s == NULL) { + s = spec->name; + } + else { + s++; + } + + ht_name = PyUnicode_FromString(s); + if (!ht_name) { + goto finally; + } + + /* Copy spec->name to a buffer we own. + * + * Unfortunately, we can't use tp_name directly (with some + * flag saying that it should be deallocated with the type), + * because tp_name is public API and may be set independently + * of any such flag. + * So, we use a separate buffer, _ht_tpname, that's always + * deallocated with the type (if it's non-NULL). + */ + Py_ssize_t name_buf_len = strlen(spec->name) + 1; + _ht_tpname = PyMem_Malloc(name_buf_len); + if (_ht_tpname == NULL) { + goto finally; + } + memcpy(_ht_tpname, spec->name, name_buf_len); + /* Get a tuple of bases. * bases is a strong reference (unlike bases_in). */ @@ -3491,7 +3603,13 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module, // here we just check its work assert(_PyType_HasFeature(base, Py_TPFLAGS_BASETYPE)); - /* Allocate the new type */ + /* Allocate the new type + * + * Between here and PyType_Ready, we should limit: + * - calls to Python code + * - raising exceptions + * - memory allocations + */ res = (PyHeapTypeObject*)metaclass->tp_alloc(metaclass, nmembers); if (res == NULL) { @@ -3503,105 +3621,70 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module, /* The flags must be initialized early, before the GC traverses us */ type->tp_flags = spec->flags | Py_TPFLAGS_HEAPTYPE; - /* Set the type name and qualname */ - const char *s = strrchr(spec->name, '.'); - if (s == NULL) { - s = spec->name; - } - else { - s++; - } - - res->ht_name = PyUnicode_FromString(s); - if (!res->ht_name) { - goto finally; - } - res->ht_qualname = Py_NewRef(res->ht_name); - - /* Copy spec->name to a buffer we own. - * - * Unfortunately, we can't use tp_name directly (with some - * flag saying that it should be deallocated with the type), - * because tp_name is public API and may be set independently - * of any such flag. - * So, we use a separate buffer, _ht_tpname, that's always - * deallocated with the type (if it's non-NULL). - */ - Py_ssize_t name_buf_len = strlen(spec->name) + 1; - res->_ht_tpname = PyMem_Malloc(name_buf_len); - if (res->_ht_tpname == NULL) { - goto finally; - } - type->tp_name = memcpy(res->_ht_tpname, spec->name, name_buf_len); - res->ht_module = Py_XNewRef(module); /* Initialize essential fields */ + type->tp_as_async = &res->as_async; type->tp_as_number = &res->as_number; type->tp_as_sequence = &res->as_sequence; type->tp_as_mapping = &res->as_mapping; type->tp_as_buffer = &res->as_buffer; - /* Set tp_base and tp_bases */ + /* Set slots we have prepared */ + type->tp_base = (PyTypeObject *)Py_NewRef(base); type->tp_bases = bases; bases = NULL; // We give our reference to bases to the type + type->tp_doc = tp_doc; + tp_doc = NULL; // Give ownership of the allocated memory to the type + + res->ht_qualname = Py_NewRef(ht_name); + res->ht_name = ht_name; + ht_name = NULL; // Give our reference to to the type + + type->tp_name = _ht_tpname; + res->_ht_tpname = _ht_tpname; + _ht_tpname = NULL; // Give ownership to to the type + /* Copy the sizes */ + type->tp_basicsize = spec->basicsize; type->tp_itemsize = spec->itemsize; + /* Copy all the ordinary slots */ + for (slot = spec->slots; slot->slot; slot++) { - if (slot->slot < 0 - || (size_t)slot->slot >= Py_ARRAY_LENGTH(pyslot_offsets)) { - PyErr_SetString(PyExc_RuntimeError, "invalid slot offset"); - goto finally; - } - else if (slot->slot == Py_tp_base || slot->slot == Py_tp_bases) { + switch (slot->slot) { + case Py_tp_base: + case Py_tp_bases: + case Py_tp_doc: /* Processed above */ - continue; - } - else if (slot->slot == Py_tp_doc) { - /* For the docstring slot, which usually points to a static string - literal, we need to make a copy */ - if (type->tp_doc != NULL) { - PyErr_SetString( - PyExc_SystemError, - "Multiple Py_tp_doc slots are not supported."); - goto finally; - } - if (slot->pfunc == NULL) { - type->tp_doc = NULL; - continue; - } - size_t len = strlen(slot->pfunc)+1; - char *tp_doc = PyObject_Malloc(len); - if (tp_doc == NULL) { - type->tp_doc = NULL; - PyErr_NoMemory(); - goto finally; + break; + case Py_tp_members: + { + /* Move the slots to the heap type itself */ + size_t len = Py_TYPE(type)->tp_itemsize * nmembers; + memcpy(_PyHeapType_GET_MEMBERS(res), slot->pfunc, len); + type->tp_members = _PyHeapType_GET_MEMBERS(res); } - memcpy(tp_doc, slot->pfunc, len); - type->tp_doc = tp_doc; - } - else if (slot->slot == Py_tp_members) { - /* Move the slots to the heap type itself */ - size_t len = Py_TYPE(type)->tp_itemsize * nmembers; - memcpy(_PyHeapType_GET_MEMBERS(res), slot->pfunc, len); - type->tp_members = _PyHeapType_GET_MEMBERS(res); - } - else { - /* Copy other slots directly */ - PySlot_Offset slotoffsets = pyslot_offsets[slot->slot]; - slot_offset = slotoffsets.slot_offset; - if (slotoffsets.subslot_offset == -1) { - *(void**)((char*)res_start + slot_offset) = slot->pfunc; - } else { - void *parent_slot = *(void**)((char*)res_start + slot_offset); - subslot_offset = slotoffsets.subslot_offset; - *(void**)((char*)parent_slot + subslot_offset) = slot->pfunc; + break; + default: + { + /* Copy other slots directly */ + PySlot_Offset slotoffsets = pyslot_offsets[slot->slot]; + short slot_offset = slotoffsets.slot_offset; + if (slotoffsets.subslot_offset == -1) { + *(void**)((char*)res_start + slot_offset) = slot->pfunc; + } + else { + void *procs = *(void**)((char*)res_start + slot_offset); + short subslot_offset = slotoffsets.subslot_offset; + *(void**)((char*)procs + subslot_offset) = slot->pfunc; + } } + break; } } if (type->tp_dealloc == NULL) { @@ -3611,14 +3694,26 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module, type->tp_dealloc = subtype_dealloc; } - if (vectorcalloffset) { - type->tp_vectorcall_offset = vectorcalloffset; - } + /* Set up offsets */ + + type->tp_vectorcall_offset = vectorcalloffset; + type->tp_weaklistoffset = weaklistoffset; + type->tp_dictoffset = dictoffset; + + /* Ready the type (which includes inheritance). + * + * After this call we should generally only touch up what's + * accessible to Python code, like __dict__. + */ if (PyType_Ready(type) < 0) { goto finally; } + if (!check_basicsize_includes_size_and_offsets(type)) { + goto finally; + } + if (type->tp_flags & Py_TPFLAGS_MANAGED_DICT) { res->ht_cached_keys = _PyDict_NewKeysForClass(); } @@ -3636,13 +3731,11 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module, } if (weaklistoffset) { - type->tp_weaklistoffset = weaklistoffset; if (PyDict_DelItem((PyObject *)type->tp_dict, &_Py_ID(__weaklistoffset__)) < 0) { goto finally; } } if (dictoffset) { - type->tp_dictoffset = dictoffset; if (PyDict_DelItem((PyObject *)type->tp_dict, &_Py_ID(__dictoffset__)) < 0) { goto finally; } @@ -3656,12 +3749,13 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module, if (r == 0) { s = strrchr(spec->name, '.'); if (s != NULL) { - modname = PyUnicode_FromStringAndSize( + PyObject *modname = PyUnicode_FromStringAndSize( spec->name, (Py_ssize_t)(s - spec->name)); if (modname == NULL) { goto finally; } r = PyDict_SetItem(type->tp_dict, &_Py_ID(__module__), modname); + Py_DECREF(modname); if (r != 0) { goto finally; } @@ -3681,7 +3775,9 @@ PyType_FromMetaclass(PyTypeObject *metaclass, PyObject *module, Py_CLEAR(res); } Py_XDECREF(bases); - Py_XDECREF(modname); + PyObject_Free(tp_doc); + Py_XDECREF(ht_name); + PyMem_Free(_ht_tpname); return (PyObject*)res; }