diff --git a/src/ffi_ptr_ext.rs b/src/ffi_ptr_ext.rs index 34f95cd2bdc..0034ea853f1 100644 --- a/src/ffi_ptr_ext.rs +++ b/src/ffi_ptr_ext.rs @@ -1,4 +1,8 @@ -use crate::{ffi, instance::Py2, PyAny, PyResult, Python}; +use crate::{ + ffi, + instance::{Py2, Py2Borrowed}, + PyAny, PyResult, Python, +}; mod sealed { use super::*; @@ -13,6 +17,24 @@ use sealed::Sealed; pub(crate) trait FfiPtrExt: Sealed { unsafe fn assume_owned_or_err(self, py: Python<'_>) -> PyResult>; unsafe fn assume_owned(self, py: Python<'_>) -> Py2<'_, PyAny>; + + /// Assumes this pointer is borrowed from a parent object. + /// + /// Warning: the lifetime `'a` is not bounded by the function arguments; the caller is + /// responsible to ensure this is tied to some appropriate lifetime. + unsafe fn assume_borrowed_or_err<'a>( + self, + py: Python<'_>, + ) -> PyResult>; + + /// Same as `assume_borrowed_or_err`, but doesn't fetch an error on NULL. + unsafe fn assume_borrowed_or_opt<'a>( + self, + py: Python<'_>, + ) -> Option>; + + /// Same as `assume_borrowed_or_err`, but panics on NULL. + unsafe fn assume_borrowed<'a>(self, py: Python<'_>) -> Py2Borrowed<'a, '_, PyAny>; } impl FfiPtrExt for *mut ffi::PyObject { @@ -25,4 +47,25 @@ impl FfiPtrExt for *mut ffi::PyObject { unsafe fn assume_owned(self, py: Python<'_>) -> Py2<'_, PyAny> { Py2::from_owned_ptr(py, self) } + + #[inline] + unsafe fn assume_borrowed_or_err<'a>( + self, + py: Python<'_>, + ) -> PyResult> { + Py2Borrowed::from_ptr_or_err(py, self) + } + + #[inline] + unsafe fn assume_borrowed_or_opt<'a>( + self, + py: Python<'_>, + ) -> Option> { + Py2Borrowed::from_ptr_or_opt(py, self) + } + + #[inline] + unsafe fn assume_borrowed<'a>(self, py: Python<'_>) -> Py2Borrowed<'a, '_, PyAny> { + Py2Borrowed::from_ptr(py, self) + } } diff --git a/src/instance.rs b/src/instance.rs index 3ec2a3f807e..7568b28f108 100644 --- a/src/instance.rs +++ b/src/instance.rs @@ -217,6 +217,96 @@ unsafe impl AsPyPointer for Py2<'_, T> { } } +/// A borrowed equivalent to `Py2`. +/// +/// The advantage of this over `&Py2` is that it avoids the need to have a pointer-to-pointer, as Py2 +/// is already a pointer to an `ffi::PyObject``. +#[repr(transparent)] +pub(crate) struct Py2Borrowed<'a, 'py, T>( + NonNull, + PhantomData<&'a Py>, + Python<'py>, +); + +impl<'a, 'py> Py2Borrowed<'a, 'py, PyAny> { + /// # Safety + /// This is similar to `std::slice::from_raw_parts`, the lifetime `'a` is completely defined by + /// the caller and it's the caller's responsibility to ensure that the reference this is + /// derived from is valid for the lifetime `'a`. + pub(crate) unsafe fn from_ptr_or_err( + py: Python<'py>, + ptr: *mut ffi::PyObject, + ) -> PyResult { + NonNull::new(ptr).map_or_else( + || Err(PyErr::fetch(py)), + |ptr| Ok(Self(ptr, PhantomData, py)), + ) + } + + /// # Safety + /// This is similar to `std::slice::from_raw_parts`, the lifetime `'a` is completely defined by + /// the caller and it's the caller's responsibility to ensure that the reference this is + /// derived from is valid for the lifetime `'a`. + pub(crate) unsafe fn from_ptr_or_opt(py: Python<'py>, ptr: *mut ffi::PyObject) -> Option { + NonNull::new(ptr).map(|ptr| Self(ptr, PhantomData, py)) + } + + /// # Safety + /// This is similar to `std::slice::from_raw_parts`, the lifetime `'a` is completely defined by + /// the caller and it's the caller's responsibility to ensure that the reference this is + /// derived from is valid for the lifetime `'a`. + pub(crate) unsafe fn from_ptr(py: Python<'py>, ptr: *mut ffi::PyObject) -> Self { + Self( + NonNull::new(ptr).unwrap_or_else(|| crate::err::panic_after_error(py)), + PhantomData, + py, + ) + } +} + +impl<'a, 'py, T> From<&'a Py2<'py, T>> for Py2Borrowed<'a, 'py, T> { + /// Create borrow on a Py2 + fn from(instance: &'a Py2<'py, T>) -> Self { + Self( + unsafe { NonNull::new_unchecked(instance.as_ptr()) }, + PhantomData, + instance.py(), + ) + } +} + +impl<'py, T> Py2Borrowed<'py, 'py, T> +where + T: HasPyGilRef, +{ + pub(crate) fn from_gil_ref(gil_ref: &'py T::AsRefTarget) -> Self { + // Safety: &'py T::AsRefTarget is expected to be a Python pointer, + // so &'py T::AsRefTarget has the same layout as Self. + unsafe { std::mem::transmute(gil_ref) } + } + + // pub(crate) fn into_gil_ref(self) -> &'py T::AsRefTarget { + // // Safety: self is a borrow over `'py`. + // unsafe { self.py().from_borrowed_ptr(self.0.as_ptr()) } + // } +} + +impl std::fmt::Debug for Py2Borrowed<'_, '_, T> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Py2::fmt(self, f) + } +} + +impl<'py, T> Deref for Py2Borrowed<'_, 'py, T> { + type Target = Py2<'py, T>; + + #[inline] + fn deref(&self) -> &Py2<'py, T> { + // safety: Py2 has the same layout as NonNull + unsafe { &*(&self.0 as *const _ as *const Py2<'py, T>) } + } +} + /// A GIL-independent reference to an object allocated on the Python heap. /// /// This type does not auto-dereference to the inner object because you must prove you hold the GIL to access it. @@ -727,6 +817,10 @@ impl Py { Py2(py, ManuallyDrop::new(self)) } + pub(crate) fn attach_borrow<'a, 'py>(&'a self, py: Python<'py>) -> Py2Borrowed<'a, 'py, T> { + Py2Borrowed(self.0, PhantomData, py) + } + /// Returns whether `self` and `other` point to the same object. To compare /// the equality of two objects (the `==` operator), use [`eq`](PyAny::eq). /// diff --git a/src/prelude.rs b/src/prelude.rs index 28a6e835a3c..85409ffebbb 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -27,5 +27,7 @@ pub use crate::wrap_pyfunction; // pub(crate) use crate::instance::Py2; // Will be stabilized with a different name // pub(crate) use crate::types::any::PyAnyMethods; // pub(crate) use crate::types::boolobject::PyBoolMethods; +// pub(crate) use crate::types::bytearray::PyByteArrayMethods; +// pub(crate) use crate::types::bytes::PyBytesMethods; // pub(crate) use crate::types::float::PyFloatMethods; // pub(crate) use crate::types::sequence::PySequenceMethods; diff --git a/src/types/bytearray.rs b/src/types/bytearray.rs index e8873ccb727..e7f02179b63 100644 --- a/src/types/bytearray.rs +++ b/src/types/bytearray.rs @@ -1,4 +1,5 @@ use crate::err::{PyErr, PyResult}; +use crate::instance::{Py2, Py2Borrowed}; use crate::{ffi, AsPyPointer, Py, PyAny, Python}; use std::os::raw::c_char; use std::slice; @@ -74,13 +75,12 @@ impl PyByteArray { /// Gets the length of the bytearray. #[inline] pub fn len(&self) -> usize { - // non-negative Py_ssize_t should always fit into Rust usize - unsafe { ffi::PyByteArray_Size(self.as_ptr()) as usize } + Py2::borrowed_from_gil_ref(&self).len() } /// Checks if the bytearray is empty. pub fn is_empty(&self) -> bool { - self.len() == 0 + Py2::borrowed_from_gil_ref(&self).is_empty() } /// Gets the start of the buffer containing the contents of the bytearray. @@ -89,7 +89,7 @@ impl PyByteArray { /// /// See the safety requirements of [`PyByteArray::as_bytes`] and [`PyByteArray::as_bytes_mut`]. pub fn data(&self) -> *mut u8 { - unsafe { ffi::PyByteArray_AsString(self.as_ptr()).cast() } + Py2::borrowed_from_gil_ref(&self).data() } /// Extracts a slice of the `ByteArray`'s entire buffer. @@ -188,7 +188,7 @@ impl PyByteArray { /// } /// ``` pub unsafe fn as_bytes(&self) -> &[u8] { - slice::from_raw_parts(self.data(), self.len()) + Py2Borrowed::from_gil_ref(self).as_bytes() } /// Extracts a mutable slice of the `ByteArray`'s entire buffer. @@ -200,7 +200,7 @@ impl PyByteArray { /// apply to this function as well. #[allow(clippy::mut_from_ref)] pub unsafe fn as_bytes_mut(&self) -> &mut [u8] { - slice::from_raw_parts_mut(self.data(), self.len()) + Py2Borrowed::from_gil_ref(self).as_bytes_mut() } /// Copies the contents of the bytearray to a Rust vector. @@ -222,7 +222,7 @@ impl PyByteArray { /// # }); /// ``` pub fn to_vec(&self) -> Vec { - unsafe { self.as_bytes() }.to_vec() + Py2::borrowed_from_gil_ref(&self).to_vec() } /// Resizes the bytearray object to the new length `len`. @@ -230,6 +230,193 @@ impl PyByteArray { /// Note that this will invalidate any pointers obtained by [PyByteArray::data], as well as /// any (unsafe) slices obtained from [PyByteArray::as_bytes] and [PyByteArray::as_bytes_mut]. pub fn resize(&self, len: usize) -> PyResult<()> { + Py2::borrowed_from_gil_ref(&self).resize(len) + } +} + +/// Implementation of functionality for [`PyByteArray`]. +/// +/// These methods are defined for the `Py2<'py, PyByteArray>` smart pointer, so to use method call +/// syntax these methods are separated into a trait, because stable Rust does not yet support +/// `arbitrary_self_types`. +#[doc(alias = "PyByteArray")] +pub trait PyByteArrayMethods<'py> { + /// Gets the length of the bytearray. + fn len(&self) -> usize; + + /// Checks if the bytearray is empty. + fn is_empty(&self) -> bool; + + /// Gets the start of the buffer containing the contents of the bytearray. + /// + /// # Safety + /// + /// See the safety requirements of [`PyByteArray::as_bytes`] and [`PyByteArray::as_bytes_mut`]. + fn data(&self) -> *mut u8; + + /// Extracts a slice of the `ByteArray`'s entire buffer. + /// + /// # Safety + /// + /// Mutation of the `bytearray` invalidates the slice. If it is used afterwards, the behavior is + /// undefined. + /// + /// These mutations may occur in Python code as well as from Rust: + /// - Calling methods like [`PyByteArray::as_bytes_mut`] and [`PyByteArray::resize`] will + /// invalidate the slice. + /// - Actions like dropping objects or raising exceptions can invoke `__del__`methods or signal + /// handlers, which may execute arbitrary Python code. This means that if Python code has a + /// reference to the `bytearray` you cannot safely use the vast majority of PyO3's API whilst + /// using the slice. + /// + /// As a result, this slice should only be used for short-lived operations without executing any + /// Python code, such as copying into a Vec. + /// + /// # Examples + /// + /// ```rust + /// use pyo3::prelude::*; + /// use pyo3::exceptions::PyRuntimeError; + /// use pyo3::types::PyByteArray; + /// + /// #[pyfunction] + /// fn a_valid_function(bytes: &PyByteArray) -> PyResult<()> { + /// let section = { + /// // SAFETY: We promise to not let the interpreter regain control + /// // or invoke any PyO3 APIs while using the slice. + /// let slice = unsafe { bytes.as_bytes() }; + /// + /// // Copy only a section of `bytes` while avoiding + /// // `to_vec` which copies the entire thing. + /// let section = slice + /// .get(6..11) + /// .ok_or_else(|| PyRuntimeError::new_err("input is not long enough"))?; + /// Vec::from(section) + /// }; + /// + /// // Now we can do things with `section` and call PyO3 APIs again. + /// // ... + /// # assert_eq!(§ion, b"world"); + /// + /// Ok(()) + /// } + /// # fn main() -> PyResult<()> { + /// # Python::with_gil(|py| -> PyResult<()> { + /// # let fun = wrap_pyfunction!(a_valid_function, py)?; + /// # let locals = pyo3::types::PyDict::new(py); + /// # locals.set_item("a_valid_function", fun)?; + /// # + /// # py.run( + /// # r#"b = bytearray(b"hello world") + /// # a_valid_function(b) + /// # + /// # try: + /// # a_valid_function(bytearray()) + /// # except RuntimeError as e: + /// # assert str(e) == 'input is not long enough'"#, + /// # None, + /// # Some(locals), + /// # )?; + /// # + /// # Ok(()) + /// # }) + /// # } + /// ``` + /// + /// # Incorrect usage + /// + /// The following `bug` function is unsound ⚠️ + /// + /// ```rust,no_run + /// # use pyo3::prelude::*; + /// # use pyo3::types::PyByteArray; + /// + /// # #[allow(dead_code)] + /// #[pyfunction] + /// fn bug(py: Python<'_>, bytes: &PyByteArray) { + /// let slice = unsafe { bytes.as_bytes() }; + /// + /// // This explicitly yields control back to the Python interpreter... + /// // ...but it's not always this obvious. Many things do this implicitly. + /// py.allow_threads(|| { + /// // Python code could be mutating through its handle to `bytes`, + /// // which makes reading it a data race, which is undefined behavior. + /// println!("{:?}", slice[0]); + /// }); + /// + /// // Python code might have mutated it, so we can not rely on the slice + /// // remaining valid. As such this is also undefined behavior. + /// println!("{:?}", slice[0]); + /// } + /// ``` + unsafe fn as_bytes(&self) -> &[u8]; + + /// Extracts a mutable slice of the `ByteArray`'s entire buffer. + /// + /// # Safety + /// + /// Any other accesses of the `bytearray`'s buffer invalidate the slice. If it is used + /// afterwards, the behavior is undefined. The safety requirements of [`PyByteArray::as_bytes`] + /// apply to this function as well. + #[allow(clippy::mut_from_ref)] + unsafe fn as_bytes_mut(&self) -> &mut [u8]; + + /// Copies the contents of the bytearray to a Rust vector. + /// + /// # Examples + /// + /// ``` + /// # use pyo3::prelude::*; + /// # use pyo3::types::PyByteArray; + /// # Python::with_gil(|py| { + /// let bytearray = PyByteArray::new(py, b"Hello World."); + /// let mut copied_message = bytearray.to_vec(); + /// assert_eq!(b"Hello World.", copied_message.as_slice()); + /// + /// copied_message[11] = b'!'; + /// assert_eq!(b"Hello World!", copied_message.as_slice()); + /// + /// pyo3::py_run!(py, bytearray, "assert bytearray == b'Hello World.'"); + /// # }); + /// ``` + fn to_vec(&self) -> Vec; + + /// Resizes the bytearray object to the new length `len`. + /// + /// Note that this will invalidate any pointers obtained by [PyByteArray::data], as well as + /// any (unsafe) slices obtained from [PyByteArray::as_bytes] and [PyByteArray::as_bytes_mut]. + fn resize(&self, len: usize) -> PyResult<()>; +} + +impl<'py> PyByteArrayMethods<'py> for Py2<'py, PyByteArray> { + #[inline] + fn len(&self) -> usize { + // non-negative Py_ssize_t should always fit into Rust usize + unsafe { ffi::PyByteArray_Size(self.as_ptr()) as usize } + } + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn data(&self) -> *mut u8 { + Py2Borrowed::from(self).data() + } + + unsafe fn as_bytes(&self) -> &[u8] { + Py2Borrowed::from(self).as_bytes() + } + + #[allow(clippy::mut_from_ref)] + unsafe fn as_bytes_mut(&self) -> &mut [u8] { + Py2Borrowed::from(self).as_bytes_mut() + } + + fn to_vec(&self) -> Vec { + unsafe { self.as_bytes() }.to_vec() + } + + fn resize(&self, len: usize) -> PyResult<()> { unsafe { let result = ffi::PyByteArray_Resize(self.as_ptr(), len as ffi::Py_ssize_t); if result == 0 { @@ -241,6 +428,22 @@ impl PyByteArray { } } +impl<'a> Py2Borrowed<'a, '_, PyByteArray> { + fn data(&self) -> *mut u8 { + unsafe { ffi::PyByteArray_AsString(self.as_ptr()).cast() } + } + + #[allow(clippy::wrong_self_convention)] + unsafe fn as_bytes(self) -> &'a [u8] { + slice::from_raw_parts(self.data(), self.len()) + } + + #[allow(clippy::wrong_self_convention)] + unsafe fn as_bytes_mut(self) -> &'a mut [u8] { + slice::from_raw_parts_mut(self.data(), self.len()) + } +} + impl<'py> TryFrom<&'py PyAny> for &'py PyByteArray { type Error = crate::PyErr; diff --git a/src/types/bytes.rs b/src/types/bytes.rs index 1df82d2bf08..5b550dd34be 100644 --- a/src/types/bytes.rs +++ b/src/types/bytes.rs @@ -1,3 +1,4 @@ +use crate::instance::{Py2, Py2Borrowed}; use crate::{ffi, FromPyObject, IntoPy, Py, PyAny, PyResult, Python, ToPyObject}; use std::borrow::Cow; use std::ops::Index; @@ -88,6 +89,32 @@ impl PyBytes { /// Gets the Python string as a byte slice. #[inline] pub fn as_bytes(&self) -> &[u8] { + Py2Borrowed::from_gil_ref(self).as_bytes() + } +} + +/// Implementation of functionality for [`PyBytes`]. +/// +/// These methods are defined for the `Py2<'py, PyBytes>` smart pointer, so to use method call +/// syntax these methods are separated into a trait, because stable Rust does not yet support +/// `arbitrary_self_types`. +#[doc(alias = "PyBytes")] +pub(crate) trait PyBytesMethods<'py> { + /// Gets the Python string as a byte slice. + fn as_bytes(&self) -> &[u8]; +} + +impl<'py> PyBytesMethods<'py> for Py2<'py, PyBytes> { + #[inline] + fn as_bytes(&self) -> &[u8] { + Py2Borrowed::from(self).as_bytes() + } +} + +impl<'a> Py2Borrowed<'a, '_, PyBytes> { + /// Gets the Python string as a byte slice. + #[allow(clippy::wrong_self_convention)] + fn as_bytes(self) -> &'a [u8] { unsafe { let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8; let length = ffi::PyBytes_Size(self.as_ptr()) as usize; @@ -101,17 +128,8 @@ impl Py { /// Gets the Python bytes as a byte slice. Because Python bytes are /// immutable, the result may be used for as long as the reference to /// `self` is held, including when the GIL is released. - pub fn as_bytes<'a>(&'a self, _py: Python<'_>) -> &'a [u8] { - // py is required here because `PyBytes_AsString` and `PyBytes_Size` - // can both technically raise exceptions which require the GIL to be - // held. The only circumstance in which they raise is if the value - // isn't really a `PyBytes`, but better safe than sorry. - unsafe { - let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8; - let length = ffi::PyBytes_Size(self.as_ptr()) as usize; - debug_assert!(!buffer.is_null()); - std::slice::from_raw_parts(buffer, length) - } + pub fn as_bytes<'a>(&'a self, py: Python<'_>) -> &'a [u8] { + self.attach_borrow(py).as_bytes() } } diff --git a/src/types/mod.rs b/src/types/mod.rs index 0174edc7be8..a7ea5b631f8 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -273,8 +273,8 @@ macro_rules! pyobject_native_type { pub(crate) mod any; pub(crate) mod boolobject; -mod bytearray; -mod bytes; +pub(crate) mod bytearray; +pub(crate) mod bytes; mod capsule; #[cfg(not(Py_LIMITED_API))] mod code;