diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 8bf05a32711835..1e294dc898290d 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -43,12 +43,10 @@ PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void); _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*. */ typedef struct _Py_Identifier { - struct _Py_Identifier *next; const char* string; - PyObject *object; } _Py_Identifier; -#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL } +#define _Py_static_string_init(value) { .string = value} #define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) #define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 77ea3f27454da0..db96ff6e3515b2 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -31,6 +31,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc); /* Various one-time initializers */ +extern PyStatus _PyUnicode_PreInit(PyThreadState *tstate); extern PyStatus _PyUnicode_Init(void); extern int _PyStructSequence_Init(void); extern int _PyLong_Init(PyThreadState *tstate); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 826298c23a924c..e2c8d0f9c40c34 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -43,6 +43,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_bytes_methods.h" #include "pycore_fileutils.h" +#include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_initconfig.h" #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_object.h" @@ -286,7 +287,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, Py_ssize_t *consumed); /* List of static strings. */ -static _Py_Identifier *static_strings = NULL; +static _Py_hashtable_t *static_strings = NULL; /* bpo-40521: Latin1 singletons are shared by all interpreters. */ #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS @@ -2275,31 +2276,43 @@ PyUnicode_FromString(const char *u) PyObject * _PyUnicode_FromId(_Py_Identifier *id) { - if (!id->object) { - id->object = PyUnicode_DecodeUTF8Stateful(id->string, - strlen(id->string), - NULL, NULL); - if (!id->object) - return NULL; - PyUnicode_InternInPlace(&id->object); - assert(!id->next); - id->next = static_strings; - static_strings = id; + PyObject *object = _Py_hashtable_get(static_strings, id); + if (object) { + // Return a borrowed reference + return object; + } + + object = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string), + NULL, NULL); + if (object == NULL) { + return NULL; } - return id->object; + PyUnicode_InternInPlace(&object); + + // Store a strong reference + if (_Py_hashtable_set(static_strings, id, object) < 0) { + PyErr_NoMemory(); + return NULL; + } + + // Return a borrowed reference + return object; +} + +static void +static_strings_decref(void *data) +{ + PyObject *object = (PyObject *)data; + Py_DECREF(object); } void _PyUnicode_ClearStaticStrings() { - _Py_Identifier *tmp, *s = static_strings; - while (s) { - Py_CLEAR(s->object); - tmp = s->next; - s->next = NULL; - s = tmp; + if (static_strings) { + _Py_hashtable_destroy(static_strings); + static_strings = NULL; } - static_strings = NULL; } /* Internal function, doesn't check maximum character */ @@ -15509,6 +15522,22 @@ PyTypeObject PyUnicode_Type = { /* Initialize the Unicode implementation */ +PyStatus +_PyUnicode_PreInit(PyThreadState *tstate) +{ + if (_Py_IsMainInterpreter(tstate)) { + static_strings = _Py_hashtable_new_full(_Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, static_strings_decref, + NULL); + if (static_strings == NULL) { + return _PyStatus_NO_MEMORY(); + } + } + return _PyStatus_OK(); +} + + PyStatus _PyUnicode_Init(void) { diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index da66a82ada70a8..b4cec67c987b20 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -579,6 +579,11 @@ pycore_init_types(PyThreadState *tstate) return status; } + status = _PyUnicode_PreInit(tstate); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + if (is_main_interp) { status = _PyTypes_Init(); if (_PyStatus_EXCEPTION(status)) {