From 0ed38a64118f74f0e36ff9fbe9a98024a68971fb Mon Sep 17 00:00:00 2001 From: Chet Nieter Date: Fri, 10 Feb 2023 16:56:11 -0500 Subject: [PATCH] Patching pybind11 to fix premature deletion of python objects Patching pybind11 to fix premature deletion of python objects created and wrapped in c++. This patch is based off an open PR for pybind11. https://github.com/pybind/pybind11/pull/2839 --- CMake/External_pybind11.cmake | 11 + Patches/pybind11/Patch.cmake | 31 + Patches/pybind11/cast.h | 1712 +++++++++++ Patches/pybind11/detail/class.h | 737 +++++ Patches/pybind11/detail/common.h | 1240 ++++++++ Patches/pybind11/include/pybind11/pybind11.h | 1838 ----------- Patches/pybind11/pybind11.h | 2883 ++++++++++++++++++ 7 files changed, 6614 insertions(+), 1838 deletions(-) create mode 100644 Patches/pybind11/Patch.cmake create mode 100644 Patches/pybind11/cast.h create mode 100644 Patches/pybind11/detail/class.h create mode 100644 Patches/pybind11/detail/common.h delete mode 100644 Patches/pybind11/include/pybind11/pybind11.h create mode 100644 Patches/pybind11/pybind11.h diff --git a/CMake/External_pybind11.cmake b/CMake/External_pybind11.cmake index b1d3fccc..62a52f21 100644 --- a/CMake/External_pybind11.cmake +++ b/CMake/External_pybind11.cmake @@ -6,12 +6,23 @@ if (PYTHON_EXECUTABLE) set(pybind_PYTHON_ARGS -DPYTHON_EXECUTABLE:FILEPATH=${PYTHON_EXECUTABLE}) endif() +# If a patch file exists, apply it +set (pybind11_patch ${fletch_SOURCE_DIR}/Patches/pybind11) +if (EXISTS ${pybind11_patch}) + set(pybind11_PATCH_COMMAND ${CMAKE_COMMAND} + -Dpybind11_patch:PATH=${pybind11_patch} + -Dpybind11_source:PATH=${fletch_BUILD_PREFIX}/src/pybind11 + -P ${pybind11_patch}/Patch.cmake + ) +endif() + ExternalProject_Add(pybind11 URL ${pybind11_url} URL_MD5 ${pybind11_md5} DOWNLOAD_NAME ${pybind11_dlname} ${COMMON_EP_ARGS} ${COMMON_CMAKE_EP_ARGS} + PATCH_COMMAND ${pybind11_PATCH_COMMAND} CMAKE_ARGS ${COMMON_CMAKE_ARGS} -DCMAKE_CXX_STANDARD=17 diff --git a/Patches/pybind11/Patch.cmake b/Patches/pybind11/Patch.cmake new file mode 100644 index 00000000..4d48a97e --- /dev/null +++ b/Patches/pybind11/Patch.cmake @@ -0,0 +1,31 @@ +#+ +# This file is called as CMake -P script for the patch step of +# External_pybind11.cmake pybind11_patch and pybind11_source are defined on the command +# line along with the call. +#- + +# Patching files based off of open PR for pybind11 +# https://github.com/pybind/pybind11/pull/2839 +configure_file( + ${pybind11_patch}/cast.h + ${pybind11_source}/include/pybind11/ + COPYONLY +) + +configure_file( + ${pybind11_patch}/pybind11.h + ${pybind11_source}/include/pybind11/ + COPYONLY +) + +configure_file( + ${pybind11_patch}/detail/class.h + ${pybind11_source}/include/pybind11/detail + COPYONLY +) + +configure_file( + ${pybind11_patch}/detail/common.h + ${pybind11_source}/include/pybind11/detail + COPYONLY +) diff --git a/Patches/pybind11/cast.h b/Patches/pybind11/cast.h new file mode 100644 index 00000000..f9ab96a5 --- /dev/null +++ b/Patches/pybind11/cast.h @@ -0,0 +1,1712 @@ +/* + pybind11/cast.h: Partial template specializations to cast between + C++ and Python types + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "detail/descr.h" +#include "detail/type_caster_base.h" +#include "detail/typeid.h" +#include "gil.h" +#include "pytypes.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +PYBIND11_WARNING_DISABLE_MSVC(4127) + +PYBIND11_NAMESPACE_BEGIN(detail) + +template +class type_caster : public type_caster_base {}; +template +using make_caster = type_caster>; + +// Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T +template +typename make_caster::template cast_op_type cast_op(make_caster &caster) { + return caster.operator typename make_caster::template cast_op_type(); +} +template +typename make_caster::template cast_op_type::type> +cast_op(make_caster &&caster) { + return std::move(caster).operator typename make_caster:: + template cast_op_type::type>(); +} + +template +class type_caster> { +private: + using caster_t = make_caster; + caster_t subcaster; + using reference_t = type &; + using subcaster_cast_op_type = typename caster_t::template cast_op_type; + + static_assert( + std::is_same::type &, subcaster_cast_op_type>::value + || std::is_same::value, + "std::reference_wrapper caster requires T to have a caster with an " + "`operator T &()` or `operator const T &()`"); + +public: + bool load(handle src, bool convert) { return subcaster.load(src, convert); } + static constexpr auto name = caster_t::name; + static handle + cast(const std::reference_wrapper &src, return_value_policy policy, handle parent) { + // It is definitely wrong to take ownership of this pointer, so mask that rvp + if (policy == return_value_policy::take_ownership + || policy == return_value_policy::automatic) { + policy = return_value_policy::automatic_reference; + } + return caster_t::cast(&src.get(), policy, parent); + } + template + using cast_op_type = std::reference_wrapper; + explicit operator std::reference_wrapper() { return cast_op(subcaster); } +}; + +#define PYBIND11_TYPE_CASTER(type, py_name) \ +protected: \ + type value; \ + \ +public: \ + static constexpr auto name = py_name; \ + template >::value, \ + int> \ + = 0> \ + static ::pybind11::handle cast( \ + T_ *src, ::pybind11::return_value_policy policy, ::pybind11::handle parent) { \ + if (!src) \ + return ::pybind11::none().release(); \ + if (policy == ::pybind11::return_value_policy::take_ownership) { \ + auto h = cast(std::move(*src), policy, parent); \ + delete src; \ + return h; \ + } \ + return cast(*src, policy, parent); \ + } \ + operator type *() { return &value; } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &() { return value; } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &&() && { return std::move(value); } /* NOLINT(bugprone-macro-parentheses) */ \ + template \ + using cast_op_type = ::pybind11::detail::movable_cast_op_type + +template +using is_std_char_type = any_of, /* std::string */ +#if defined(PYBIND11_HAS_U8STRING) + std::is_same, /* std::u8string */ +#endif + std::is_same, /* std::u16string */ + std::is_same, /* std::u32string */ + std::is_same /* std::wstring */ + >; + +template +struct type_caster::value && !is_std_char_type::value>> { + using _py_type_0 = conditional_t; + using _py_type_1 = conditional_t::value, + _py_type_0, + typename std::make_unsigned<_py_type_0>::type>; + using py_type = conditional_t::value, double, _py_type_1>; + +public: + bool load(handle src, bool convert) { + py_type py_value; + + if (!src) { + return false; + } + +#if !defined(PYPY_VERSION) + auto index_check = [](PyObject *o) { return PyIndex_Check(o); }; +#else + // In PyPy 7.3.3, `PyIndex_Check` is implemented by calling `__index__`, + // while CPython only considers the existence of `nb_index`/`__index__`. + auto index_check = [](PyObject *o) { return hasattr(o, "__index__"); }; +#endif + + if (std::is_floating_point::value) { + if (convert || PyFloat_Check(src.ptr())) { + py_value = (py_type) PyFloat_AsDouble(src.ptr()); + } else { + return false; + } + } else if (PyFloat_Check(src.ptr()) + || (!convert && !PYBIND11_LONG_CHECK(src.ptr()) && !index_check(src.ptr()))) { + return false; + } else { + handle src_or_index = src; + // PyPy: 7.3.7's 3.8 does not implement PyLong_*'s __index__ calls. +#if PY_VERSION_HEX < 0x03080000 || defined(PYPY_VERSION) + object index; + if (!PYBIND11_LONG_CHECK(src.ptr())) { // So: index_check(src.ptr()) + index = reinterpret_steal(PyNumber_Index(src.ptr())); + if (!index) { + PyErr_Clear(); + if (!convert) + return false; + } else { + src_or_index = index; + } + } +#endif + if (std::is_unsigned::value) { + py_value = as_unsigned(src_or_index.ptr()); + } else { // signed integer: + py_value = sizeof(T) <= sizeof(long) + ? (py_type) PyLong_AsLong(src_or_index.ptr()) + : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr()); + } + } + + // Python API reported an error + bool py_err = py_value == (py_type) -1 && PyErr_Occurred(); + + // Check to see if the conversion is valid (integers should match exactly) + // Signed/unsigned checks happen elsewhere + if (py_err + || (std::is_integral::value && sizeof(py_type) != sizeof(T) + && py_value != (py_type) (T) py_value)) { + PyErr_Clear(); + if (py_err && convert && (PyNumber_Check(src.ptr()) != 0)) { + auto tmp = reinterpret_steal(std::is_floating_point::value + ? PyNumber_Float(src.ptr()) + : PyNumber_Long(src.ptr())); + PyErr_Clear(); + return load(tmp, false); + } + return false; + } + + value = (T) py_value; + return true; + } + + template + static typename std::enable_if::value, handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyFloat_FromDouble((double) src); + } + + template + static typename std::enable_if::value && std::is_signed::value + && (sizeof(U) <= sizeof(long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_SIGNED((long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value + && (sizeof(U) <= sizeof(unsigned long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_UNSIGNED((unsigned long) src); + } + + template + static typename std::enable_if::value && std::is_signed::value + && (sizeof(U) > sizeof(long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromLongLong((long long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value + && (sizeof(U) > sizeof(unsigned long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromUnsignedLongLong((unsigned long long) src); + } + + PYBIND11_TYPE_CASTER(T, const_name::value>("int", "float")); +}; + +template +struct void_caster { +public: + bool load(handle src, bool) { + if (src && src.is_none()) { + return true; + } + return false; + } + static handle cast(T, return_value_policy /* policy */, handle /* parent */) { + return none().release(); + } + PYBIND11_TYPE_CASTER(T, const_name("None")); +}; + +template <> +class type_caster : public void_caster {}; + +template <> +class type_caster : public type_caster { +public: + using type_caster::cast; + + bool load(handle h, bool) { + if (!h) { + return false; + } + if (h.is_none()) { + value = nullptr; + return true; + } + + /* Check if this is a capsule */ + if (isinstance(h)) { + value = reinterpret_borrow(h); + return true; + } + + /* Check if this is a C++ type */ + const auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr()); + if (bases.size() == 1) { // Only allowing loading from a single-value type + value = values_and_holders(reinterpret_cast(h.ptr())).begin()->value_ptr(); + return true; + } + + /* Fail */ + return false; + } + + static handle cast(const void *ptr, return_value_policy /* policy */, handle /* parent */) { + if (ptr) { + return capsule(ptr).release(); + } + return none().release(); + } + + template + using cast_op_type = void *&; + explicit operator void *&() { return value; } + static constexpr auto name = const_name("capsule"); + +private: + void *value = nullptr; +}; + +template <> +class type_caster : public void_caster {}; + +template <> +class type_caster { +public: + bool load(handle src, bool convert) { + if (!src) { + return false; + } + if (src.ptr() == Py_True) { + value = true; + return true; + } + if (src.ptr() == Py_False) { + value = false; + return true; + } + if (convert || (std::strcmp("numpy.bool_", Py_TYPE(src.ptr())->tp_name) == 0)) { + // (allow non-implicit conversion for numpy booleans) + + Py_ssize_t res = -1; + if (src.is_none()) { + res = 0; // None is implicitly converted to False + } +#if defined(PYPY_VERSION) + // On PyPy, check that "__bool__" attr exists + else if (hasattr(src, PYBIND11_BOOL_ATTR)) { + res = PyObject_IsTrue(src.ptr()); + } +#else + // Alternate approach for CPython: this does the same as the above, but optimized + // using the CPython API so as to avoid an unneeded attribute lookup. + else if (auto *tp_as_number = src.ptr()->ob_type->tp_as_number) { + if (PYBIND11_NB_BOOL(tp_as_number)) { + res = (*PYBIND11_NB_BOOL(tp_as_number))(src.ptr()); + } + } +#endif + if (res == 0 || res == 1) { + value = (res != 0); + return true; + } + PyErr_Clear(); + } + return false; + } + static handle cast(bool src, return_value_policy /* policy */, handle /* parent */) { + return handle(src ? Py_True : Py_False).inc_ref(); + } + PYBIND11_TYPE_CASTER(bool, const_name("bool")); +}; + +// Helper class for UTF-{8,16,32} C++ stl strings: +template +struct string_caster { + using CharT = typename StringType::value_type; + + // Simplify life by being able to assume standard char sizes (the standard only guarantees + // minimums, but Python requires exact sizes) + static_assert(!std::is_same::value || sizeof(CharT) == 1, + "Unsupported char size != 1"); +#if defined(PYBIND11_HAS_U8STRING) + static_assert(!std::is_same::value || sizeof(CharT) == 1, + "Unsupported char8_t size != 1"); +#endif + static_assert(!std::is_same::value || sizeof(CharT) == 2, + "Unsupported char16_t size != 2"); + static_assert(!std::is_same::value || sizeof(CharT) == 4, + "Unsupported char32_t size != 4"); + // wchar_t can be either 16 bits (Windows) or 32 (everywhere else) + static_assert(!std::is_same::value || sizeof(CharT) == 2 || sizeof(CharT) == 4, + "Unsupported wchar_t size != 2/4"); + static constexpr size_t UTF_N = 8 * sizeof(CharT); + + bool load(handle src, bool) { + handle load_src = src; + if (!src) { + return false; + } + if (!PyUnicode_Check(load_src.ptr())) { + return load_raw(load_src); + } + + // For UTF-8 we avoid the need for a temporary `bytes` object by using + // `PyUnicode_AsUTF8AndSize`. + if (UTF_N == 8) { + Py_ssize_t size = -1; + const auto *buffer + = reinterpret_cast(PyUnicode_AsUTF8AndSize(load_src.ptr(), &size)); + if (!buffer) { + PyErr_Clear(); + return false; + } + value = StringType(buffer, static_cast(size)); + return true; + } + + auto utfNbytes + = reinterpret_steal(PyUnicode_AsEncodedString(load_src.ptr(), + UTF_N == 8 ? "utf-8" + : UTF_N == 16 ? "utf-16" + : "utf-32", + nullptr)); + if (!utfNbytes) { + PyErr_Clear(); + return false; + } + + const auto *buffer + = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr())); + size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT); + // Skip BOM for UTF-16/32 + if (UTF_N > 8) { + buffer++; + length--; + } + value = StringType(buffer, length); + + // If we're loading a string_view we need to keep the encoded Python object alive: + if (IsView) { + loader_life_support::add_patient(utfNbytes); + } + + return true; + } + + static handle + cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) { + const char *buffer = reinterpret_cast(src.data()); + auto nbytes = ssize_t(src.size() * sizeof(CharT)); + handle s = decode_utfN(buffer, nbytes); + if (!s) { + throw error_already_set(); + } + return s; + } + + PYBIND11_TYPE_CASTER(StringType, const_name(PYBIND11_STRING_NAME)); + +private: + static handle decode_utfN(const char *buffer, ssize_t nbytes) { +#if !defined(PYPY_VERSION) + return UTF_N == 8 ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) + : UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) + : PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr); +#else + // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as + // well), so bypass the whole thing by just passing the encoding as a string value, which + // works properly: + return PyUnicode_Decode(buffer, + nbytes, + UTF_N == 8 ? "utf-8" + : UTF_N == 16 ? "utf-16" + : "utf-32", + nullptr); +#endif + } + + // When loading into a std::string or char*, accept a bytes/bytearray object as-is (i.e. + // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. + // which supports loading a unicode from a str, doesn't take this path. + template + bool load_raw(enable_if_t::value, handle> src) { + if (PYBIND11_BYTES_CHECK(src.ptr())) { + // We were passed raw bytes; accept it into a std::string or char* + // without any encoding attempt. + const char *bytes = PYBIND11_BYTES_AS_STRING(src.ptr()); + if (!bytes) { + pybind11_fail("Unexpected PYBIND11_BYTES_AS_STRING() failure."); + } + value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr())); + return true; + } + if (PyByteArray_Check(src.ptr())) { + // We were passed a bytearray; accept it into a std::string or char* + // without any encoding attempt. + const char *bytearray = PyByteArray_AsString(src.ptr()); + if (!bytearray) { + pybind11_fail("Unexpected PyByteArray_AsString() failure."); + } + value = StringType(bytearray, (size_t) PyByteArray_Size(src.ptr())); + return true; + } + + return false; + } + + template + bool load_raw(enable_if_t::value, handle>) { + return false; + } +}; + +template +struct type_caster, + enable_if_t::value>> + : string_caster> {}; + +#ifdef PYBIND11_HAS_STRING_VIEW +template +struct type_caster, + enable_if_t::value>> + : string_caster, true> {}; +#endif + +// Type caster for C-style strings. We basically use a std::string type caster, but also add the +// ability to use None as a nullptr char* (which the string caster doesn't allow). +template +struct type_caster::value>> { + using StringType = std::basic_string; + using StringCaster = make_caster; + StringCaster str_caster; + bool none = false; + CharT one_char = 0; + +public: + bool load(handle src, bool convert) { + if (!src) { + return false; + } + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) { + return false; + } + none = true; + return true; + } + return str_caster.load(src, convert); + } + + static handle cast(const CharT *src, return_value_policy policy, handle parent) { + if (src == nullptr) { + return pybind11::none().release(); + } + return StringCaster::cast(StringType(src), policy, parent); + } + + static handle cast(CharT src, return_value_policy policy, handle parent) { + if (std::is_same::value) { + handle s = PyUnicode_DecodeLatin1((const char *) &src, 1, nullptr); + if (!s) { + throw error_already_set(); + } + return s; + } + return StringCaster::cast(StringType(1, src), policy, parent); + } + + explicit operator CharT *() { + return none ? nullptr : const_cast(static_cast(str_caster).c_str()); + } + explicit operator CharT &() { + if (none) { + throw value_error("Cannot convert None to a character"); + } + + auto &value = static_cast(str_caster); + size_t str_len = value.size(); + if (str_len == 0) { + throw value_error("Cannot convert empty string to a character"); + } + + // If we're in UTF-8 mode, we have two possible failures: one for a unicode character that + // is too high, and one for multiple unicode characters (caught later), so we need to + // figure out how long the first encoded character is in bytes to distinguish between these + // two errors. We also allow want to allow unicode characters U+0080 through U+00FF, as + // those can fit into a single char value. + if (StringCaster::UTF_N == 8 && str_len > 1 && str_len <= 4) { + auto v0 = static_cast(value[0]); + // low bits only: 0-127 + // 0b110xxxxx - start of 2-byte sequence + // 0b1110xxxx - start of 3-byte sequence + // 0b11110xxx - start of 4-byte sequence + size_t char0_bytes = (v0 & 0x80) == 0 ? 1 + : (v0 & 0xE0) == 0xC0 ? 2 + : (v0 & 0xF0) == 0xE0 ? 3 + : 4; + + if (char0_bytes == str_len) { + // If we have a 128-255 value, we can decode it into a single char: + if (char0_bytes == 2 && (v0 & 0xFC) == 0xC0) { // 0x110000xx 0x10xxxxxx + one_char = static_cast(((v0 & 3) << 6) + + (static_cast(value[1]) & 0x3F)); + return one_char; + } + // Otherwise we have a single character, but it's > U+00FF + throw value_error("Character code point not in range(0x100)"); + } + } + + // UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a + // surrogate pair with total length 2 instantly indicates a range error (but not a "your + // string was too long" error). + else if (StringCaster::UTF_N == 16 && str_len == 2) { + one_char = static_cast(value[0]); + if (one_char >= 0xD800 && one_char < 0xE000) { + throw value_error("Character code point not in range(0x10000)"); + } + } + + if (str_len != 1) { + throw value_error("Expected a character, but multi-character string found"); + } + + one_char = value[0]; + return one_char; + } + + static constexpr auto name = const_name(PYBIND11_STRING_NAME); + template + using cast_op_type = pybind11::detail::cast_op_type<_T>; +}; + +// Base implementation for std::tuple and std::pair +template