From b04a157107255e62c123a2417baf9de53004b86c Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Sun, 3 Apr 2022 09:30:26 +0200 Subject: [PATCH] Add PyString::intern to enable access to Python's built-in string interning. --- CHANGELOG.md | 1 + src/types/string.rs | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db19eba9431..6a95190576a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Allow dependent crates to access config values from `pyo3-build-config` via cargo link dep env vars. [#2092](https://github.com/PyO3/pyo3/pull/2092) - Added methods on `InterpreterConfig` to run Python scripts using the configured executable. [#2092](https://github.com/PyO3/pyo3/pull/2092) - Added FFI definitions for `PyType_FromModuleAndSpec`, `PyType_GetModule`, `PyType_GetModuleState` and `PyModule_AddType`. [#2250](https://github.com/PyO3/pyo3/pull/2250) +- Add `PyString::intern` to enable usage of the Python's built-in string interning. [#2268](https://github.com/PyO3/pyo3/pull/2268) ### Changed diff --git a/src/types/string.rs b/src/types/string.rs index 76610a108ae..1af4effe14e 100644 --- a/src/types/string.rs +++ b/src/types/string.rs @@ -8,6 +8,7 @@ use crate::{ ToPyObject, }; use std::borrow::Cow; +use std::ffi::CStr; use std::os::raw::c_char; use std::str; @@ -69,7 +70,6 @@ impl<'a> PyStringData<'a> { /// C APIs that skip input validation (like `PyUnicode_FromKindAndData`) and should /// never occur for strings that were created from Python code. pub fn to_string(self, py: Python<'_>) -> PyResult> { - use std::ffi::CStr; match self { Self::Ucs1(data) => match str::from_utf8(data) { Ok(s) => Ok(Cow::Borrowed(s)), @@ -144,6 +144,29 @@ impl PyString { unsafe { py.from_owned_ptr(ffi::PyUnicode_FromStringAndSize(ptr, len)) } } + /// Intern the given null-terminated string + /// + /// This will return a reference to the same Python string object if called repeatedly with the same string. + /// + /// Panics if out of memory. + /// + /// # Example + /// + /// ``` + /// use std::ffi::CStr; + /// # use pyo3::{types::PyString, Python}; + /// + /// # Python::with_gil(|py| { + /// let c_str = CStr::from_bytes_with_nul(b"foobar\0").unwrap(); + /// let py_str = PyString::intern(py, c_str); + /// assert_eq!(c_str.to_str().unwrap(), py_str.to_str().unwrap()); + /// # }); + /// ``` + pub fn intern<'p>(py: Python<'p>, s: &CStr) -> &'p PyString { + let ptr = s.as_ptr() as *const c_char; + unsafe { py.from_owned_ptr(ffi::PyUnicode_InternFromString(ptr)) } + } + /// Attempts to create a Python string from a Python [bytes-like object]. /// /// [bytes-like object]: (https://docs.python.org/3/glossary.html#term-bytes-like-object). @@ -592,4 +615,19 @@ mod tests { assert_eq!(data.to_string_lossy(), Cow::Owned::("𠀀�".into())); }); } + + #[test] + fn test_intern_string() { + Python::with_gil(|py| { + let s = CStr::from_bytes_with_nul(b"foobar\0").unwrap(); + + let py_string1 = PyString::intern(py, s); + assert_eq!(s.to_str().unwrap(), py_string1.to_str().unwrap()); + + let py_string2 = PyString::intern(py, s); + assert_eq!(s.to_str().unwrap(), py_string2.to_str().unwrap()); + + assert_eq!(py_string1.as_ptr(), py_string2.as_ptr()); + }); + } }