Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] bpo-39465: _PyUnicode_FromId() now uses an hash table #20048

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,10 @@ PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
_PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
*/
typedef struct _Py_Identifier {
struct _Py_Identifier *next;
const char* string;
PyObject *object;
} _Py_Identifier;

#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL }
#define _Py_static_string_init(value) { .string = value}
#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value)
#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)

Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);

/* Various one-time initializers */

extern PyStatus _PyUnicode_PreInit(PyThreadState *tstate);
extern PyStatus _PyUnicode_Init(void);
extern int _PyStructSequence_Init(void);
extern int _PyLong_Init(PyThreadState *tstate);
Expand Down
67 changes: 48 additions & 19 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_bytes_methods.h"
#include "pycore_fileutils.h"
#include "pycore_hashtable.h" // _Py_hashtable_new()
#include "pycore_initconfig.h"
#include "pycore_interp.h" // PyInterpreterState.fs_codec
#include "pycore_object.h"
Expand Down Expand Up @@ -286,7 +287,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
Py_ssize_t *consumed);

/* List of static strings. */
static _Py_Identifier *static_strings = NULL;
static _Py_hashtable_t *static_strings = NULL;

/* bpo-40521: Latin1 singletons are shared by all interpreters. */
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
Expand Down Expand Up @@ -2275,31 +2276,43 @@ PyUnicode_FromString(const char *u)
PyObject *
_PyUnicode_FromId(_Py_Identifier *id)
{
if (!id->object) {
id->object = PyUnicode_DecodeUTF8Stateful(id->string,
strlen(id->string),
NULL, NULL);
if (!id->object)
return NULL;
PyUnicode_InternInPlace(&id->object);
assert(!id->next);
id->next = static_strings;
static_strings = id;
PyObject *object = _Py_hashtable_get(static_strings, id);
if (object) {
// Return a borrowed reference
return object;
}

object = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string),
NULL, NULL);
if (object == NULL) {
return NULL;
}
return id->object;
PyUnicode_InternInPlace(&object);

// Store a strong reference
if (_Py_hashtable_set(static_strings, id, object) < 0) {
PyErr_NoMemory();
return NULL;
}

// Return a borrowed reference
return object;
}

static void
static_strings_decref(void *data)
{
PyObject *object = (PyObject *)data;
Py_DECREF(object);
}

void
_PyUnicode_ClearStaticStrings()
{
_Py_Identifier *tmp, *s = static_strings;
while (s) {
Py_CLEAR(s->object);
tmp = s->next;
s->next = NULL;
s = tmp;
if (static_strings) {
_Py_hashtable_destroy(static_strings);
static_strings = NULL;
}
static_strings = NULL;
}

/* Internal function, doesn't check maximum character */
Expand Down Expand Up @@ -15509,6 +15522,22 @@ PyTypeObject PyUnicode_Type = {

/* Initialize the Unicode implementation */

PyStatus
_PyUnicode_PreInit(PyThreadState *tstate)
{
if (_Py_IsMainInterpreter(tstate)) {
static_strings = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct,
NULL, static_strings_decref,
NULL);
if (static_strings == NULL) {
return _PyStatus_NO_MEMORY();
}
}
return _PyStatus_OK();
}


PyStatus
_PyUnicode_Init(void)
{
Expand Down
5 changes: 5 additions & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,11 @@ pycore_init_types(PyThreadState *tstate)
return status;
}

status = _PyUnicode_PreInit(tstate);
if (_PyStatus_EXCEPTION(status)) {
return status;
}

if (is_main_interp) {
status = _PyTypes_Init();
if (_PyStatus_EXCEPTION(status)) {
Expand Down