Skip to content

Commit

Permalink
Add PyString::intern to enable access to Python's built-in string int…
Browse files Browse the repository at this point in the history
…erning.
  • Loading branch information
adamreichold committed Apr 3, 2022
1 parent bfc090c commit b04a157
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Allow dependent crates to access config values from `pyo3-build-config` via cargo link dep env vars. [#2092](https://github.com/PyO3/pyo3/pull/2092)
- Added methods on `InterpreterConfig` to run Python scripts using the configured executable. [#2092](https://github.com/PyO3/pyo3/pull/2092)
- Added FFI definitions for `PyType_FromModuleAndSpec`, `PyType_GetModule`, `PyType_GetModuleState` and `PyModule_AddType`. [#2250](https://github.com/PyO3/pyo3/pull/2250)
- Add `PyString::intern` to enable usage of the Python's built-in string interning. [#2268](https://github.com/PyO3/pyo3/pull/2268)

### Changed

Expand Down
40 changes: 39 additions & 1 deletion src/types/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::{
ToPyObject,
};
use std::borrow::Cow;
use std::ffi::CStr;
use std::os::raw::c_char;
use std::str;

Expand Down Expand Up @@ -69,7 +70,6 @@ impl<'a> PyStringData<'a> {
/// C APIs that skip input validation (like `PyUnicode_FromKindAndData`) and should
/// never occur for strings that were created from Python code.
pub fn to_string(self, py: Python<'_>) -> PyResult<Cow<'a, str>> {
use std::ffi::CStr;
match self {
Self::Ucs1(data) => match str::from_utf8(data) {
Ok(s) => Ok(Cow::Borrowed(s)),
Expand Down Expand Up @@ -144,6 +144,29 @@ impl PyString {
unsafe { py.from_owned_ptr(ffi::PyUnicode_FromStringAndSize(ptr, len)) }
}

/// Intern the given null-terminated string
///
/// This will return a reference to the same Python string object if called repeatedly with the same string.
///
/// Panics if out of memory.
///
/// # Example
///
/// ```
/// use std::ffi::CStr;
/// # use pyo3::{types::PyString, Python};
///
/// # Python::with_gil(|py| {
/// let c_str = CStr::from_bytes_with_nul(b"foobar\0").unwrap();
/// let py_str = PyString::intern(py, c_str);
/// assert_eq!(c_str.to_str().unwrap(), py_str.to_str().unwrap());
/// # });
/// ```
pub fn intern<'p>(py: Python<'p>, s: &CStr) -> &'p PyString {
let ptr = s.as_ptr() as *const c_char;
unsafe { py.from_owned_ptr(ffi::PyUnicode_InternFromString(ptr)) }
}

/// Attempts to create a Python string from a Python [bytes-like object].
///
/// [bytes-like object]: (https://docs.python.org/3/glossary.html#term-bytes-like-object).
Expand Down Expand Up @@ -592,4 +615,19 @@ mod tests {
assert_eq!(data.to_string_lossy(), Cow::Owned::<str>("𠀀�".into()));
});
}

#[test]
fn test_intern_string() {
Python::with_gil(|py| {
let s = CStr::from_bytes_with_nul(b"foobar\0").unwrap();

let py_string1 = PyString::intern(py, s);
assert_eq!(s.to_str().unwrap(), py_string1.to_str().unwrap());

let py_string2 = PyString::intern(py, s);
assert_eq!(s.to_str().unwrap(), py_string2.to_str().unwrap());

assert_eq!(py_string1.as_ptr(), py_string2.as_ptr());
});
}
}

0 comments on commit b04a157

Please sign in to comment.