Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PEP 623: wstr, wstr_length, state changes #3087

Merged
merged 1 commit into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 60 additions & 12 deletions pyo3-ffi/src/cpython/unicodeobject.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#[cfg(not(PyPy))]
use crate::Py_hash_t;
use crate::{PyObject, Py_UCS1, Py_UCS2, Py_UCS4, Py_UNICODE, Py_ssize_t};
#[cfg(not(Py_3_12))]
use libc::wchar_t;
use std::os::raw::{c_char, c_int, c_uint, c_void};

Expand Down Expand Up @@ -116,6 +117,28 @@ where
}
}

const STATE_INTERNED_INDEX: usize = 0;
#[cfg(not(Py_3_12))]
const STATE_INTERNED_WIDTH: u8 = 2;
#[cfg(Py_3_12)]
const STATE_INTERNED_WIDTH: u8 = 1;

const STATE_KIND_INDEX: usize = STATE_INTERNED_WIDTH as usize;
const STATE_KIND_WIDTH: u8 = 3;

const STATE_COMPACT_INDEX: usize = (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH) as usize;
const STATE_COMPACT_WIDTH: u8 = 1;

const STATE_ASCII_INDEX: usize =
(STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH) as usize;
const STATE_ASCII_WIDTH: u8 = 1;

#[cfg(not(Py_3_12))]
const STATE_READY_INDEX: usize =
(STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
#[cfg(not(Py_3_12))]
const STATE_READY_WIDTH: u8 = 1;

// generated by bindgen v0.63.0 (with small adaptations)
// The same code is generated for Python 3.7, 3.8, 3.9, 3.10, and 3.11, but the "ready" field
// has been removed from Python 3.12.
Expand All @@ -137,57 +160,67 @@ struct PyASCIIObjectState {
impl PyASCIIObjectState {
#[inline]
unsafe fn interned(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(0usize, 2u8) as u32)
std::mem::transmute(
self._bitfield
.get(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH) as u32,
)
}

#[inline]
unsafe fn set_interned(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(0usize, 2u8, val as u64)
self._bitfield
.set(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH, val as u64)
}

#[inline]
unsafe fn kind(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(2usize, 3u8) as u32)
std::mem::transmute(self._bitfield.get(STATE_KIND_INDEX, STATE_KIND_WIDTH) as u32)
}

#[inline]
unsafe fn set_kind(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(2usize, 3u8, val as u64)
self._bitfield
.set(STATE_KIND_INDEX, STATE_KIND_WIDTH, val as u64)
}

#[inline]
unsafe fn compact(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(5usize, 1u8) as u32)
std::mem::transmute(self._bitfield.get(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH) as u32)
}

#[inline]
unsafe fn set_compact(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(5usize, 1u8, val as u64)
self._bitfield
.set(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH, val as u64)
}

#[inline]
unsafe fn ascii(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(6usize, 1u8) as u32)
std::mem::transmute(self._bitfield.get(STATE_ASCII_INDEX, STATE_ASCII_WIDTH) as u32)
}

#[inline]
unsafe fn set_ascii(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(6usize, 1u8, val as u64)
self._bitfield
.set(STATE_ASCII_INDEX, STATE_ASCII_WIDTH, val as u64)
}

#[cfg(not(Py_3_12))]
#[inline]
unsafe fn ready(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(7usize, 1u8) as u32)
std::mem::transmute(self._bitfield.get(STATE_READY_INDEX, STATE_READY_WIDTH) as u32)
}

#[cfg(not(Py_3_12))]
#[inline]
unsafe fn set_ready(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(7usize, 1u8, val as u64)
self._bitfield
.set(STATE_READY_INDEX, STATE_READY_WIDTH, val as u64)
}
}

Expand Down Expand Up @@ -226,14 +259,16 @@ pub struct PyASCIIObject {
/// unsigned int ready:1;
/// unsigned int :24;
pub state: u32,
#[cfg(not(Py_3_12))]
pub wstr: *mut wchar_t,
}

/// Interacting with the bitfield is not actually well-defined, so we mark these APIs unsafe.
impl PyASCIIObject {
/// Get the `interned` field of the [`PyASCIIObject`] state bitfield.
///
/// Returns one of: [`SSTATE_NOT_INTERNED`], [`SSTATE_INTERNED_MORTAL`], [`SSTATE_INTERNED_IMMORTAL`]
/// Returns one of: [`SSTATE_NOT_INTERNED`], [`SSTATE_INTERNED_MORTAL`],
/// or on CPython earlier than 3.12, [`SSTATE_INTERNED_IMMORTAL`]
#[inline]
pub unsafe fn interned(&self) -> c_uint {
PyASCIIObjectState::from(self.state).interned()
Expand All @@ -242,7 +277,8 @@ impl PyASCIIObject {
/// Set the `interned` field of the [`PyASCIIObject`] state bitfield.
///
/// Calling this function with an argument that is not [`SSTATE_NOT_INTERNED`],
/// [`SSTATE_INTERNED_MORTAL`], or [`SSTATE_INTERNED_IMMORTAL`] is invalid.
/// [`SSTATE_INTERNED_MORTAL`], or on CPython earlier than 3.12,
/// [`SSTATE_INTERNED_IMMORTAL`] is invalid.
#[inline]
pub unsafe fn set_interned(&mut self, val: c_uint) {
let mut state = PyASCIIObjectState::from(self.state);
Expand Down Expand Up @@ -309,6 +345,7 @@ impl PyASCIIObject {
/// Get the `ready` field of the [`PyASCIIObject`] state bitfield.
///
/// Returns either `0` or `1`.
#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn ready(&self) -> c_uint {
PyASCIIObjectState::from(self.state).ready()
Expand All @@ -317,6 +354,7 @@ impl PyASCIIObject {
/// Set the `ready` flag of the [`PyASCIIObject`] state bitfield.
///
/// Calling this function with an argument that is neither `0` nor `1` is invalid.
#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn set_ready(&mut self, val: c_uint) {
let mut state = PyASCIIObjectState::from(self.state);
Expand All @@ -330,6 +368,7 @@ pub struct PyCompactUnicodeObject {
pub _base: PyASCIIObject,
pub utf8_length: Py_ssize_t,
pub utf8: *mut c_char,
#[cfg(not(Py_3_12))]
pub wstr_length: Py_ssize_t,
}

Expand Down Expand Up @@ -359,6 +398,7 @@ extern "C" {

pub const SSTATE_NOT_INTERNED: c_uint = 0;
pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
#[cfg(not(Py_3_12))]
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;

#[inline]
Expand Down Expand Up @@ -449,6 +489,14 @@ pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
(*(op as *mut PyASCIIObject)).length
}

#[cfg(Py_3_12)]
#[inline]
pub unsafe fn PyUnicode_IS_READY(_op: *mut PyObject) -> c_uint {
// kept in CPython for backwards compatibility
1
}

#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
(*(op as *mut PyASCIIObject)).ready()
Expand Down
2 changes: 2 additions & 0 deletions pyo3-ffi/src/unicodeobject.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ extern "C" {
pub fn PyUnicode_FromFormat(format: *const c_char, ...) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_InternInPlace")]
pub fn PyUnicode_InternInPlace(arg1: *mut *mut PyObject);
#[cfg(not(Py_3_12))]
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
pub fn PyUnicode_InternImmortal(arg1: *mut *mut PyObject);
#[cfg_attr(PyPy, link_name = "PyPyUnicode_InternFromString")]
pub fn PyUnicode_InternFromString(u: *const c_char) -> *mut PyObject;
Expand Down