Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-39465: Fix _PyUnicode_FromId() for subinterpreters #20058

Merged
merged 2 commits into from
Dec 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
_PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
*/
typedef struct _Py_Identifier {
struct _Py_Identifier *next;
const char* string;
PyObject *object;
// Index in PyInterpreterState.unicode.ids.array. It is process-wide
// unique and must be initialized to -1.
Py_ssize_t index;
} _Py_Identifier;

#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL }
#define _Py_static_string_init(value) { .string = value, .index = -1 }
#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value)
#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)

Expand Down
7 changes: 7 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,20 @@ struct _Py_bytes_state {
PyBytesObject *characters[256];
};

struct _Py_unicode_ids {
Py_ssize_t size;
PyObject **array;
};

struct _Py_unicode_state {
// The empty Unicode object is a singleton to improve performance.
PyObject *empty_string;
/* Single character Unicode strings in the Latin-1 range are being
shared as well. */
PyObject *latin1[256];
struct _Py_unicode_fs_codec fs_codec;
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
struct _Py_unicode_ids ids;
};

struct _Py_float_state {
Expand Down
7 changes: 7 additions & 0 deletions Include/internal/pycore_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ typedef struct _Py_AuditHookEntry {
void *userData;
} _Py_AuditHookEntry;

struct _Py_unicode_runtime_ids {
PyThread_type_lock lock;
Py_ssize_t next_index;
};

/* Full Python runtime state */

typedef struct pyruntimestate {
Expand Down Expand Up @@ -106,6 +111,8 @@ typedef struct pyruntimestate {
void *open_code_userdata;
_Py_AuditHookEntry *audit_hook_head;

struct _Py_unicode_runtime_ids unicode_ids;

// XXX Consolidate globals found via the check-c-globals script.
} _PyRuntimeState;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Make :c:func:`_PyUnicode_FromId` function compatible with subinterpreters.
Each interpreter now has an array of identifier objects (interned strings
decoded from UTF-8). Patch by Victor Stinner.
85 changes: 62 additions & 23 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_atomic_funcs.h" // _Py_atomic_size_get()
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
#include "pycore_format.h" // F_LJUST
#include "pycore_initconfig.h" // _PyStatus_OK()
Expand Down Expand Up @@ -302,9 +303,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed);

/* List of static strings. */
static _Py_Identifier *static_strings = NULL;

/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
0, 0, 0, 0, 0, 0, 0, 0,
Expand Down Expand Up @@ -2312,42 +2310,85 @@ PyUnicode_FromString(const char *u)
return PyUnicode_DecodeUTF8Stateful(u, (Py_ssize_t)size, NULL, NULL);
}


PyObject *
_PyUnicode_FromId(_Py_Identifier *id)
{
if (id->object) {
return id->object;
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _Py_unicode_ids *ids = &interp->unicode.ids;

int index = _Py_atomic_size_get(&id->index);
if (index < 0) {
struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids;

PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK);
// Check again to detect concurrent access. Another thread can have
// initialized the index while this thread waited for the lock.
index = _Py_atomic_size_get(&id->index);
if (index < 0) {
assert(rt_ids->next_index < PY_SSIZE_T_MAX);
index = rt_ids->next_index;
rt_ids->next_index++;
_Py_atomic_size_set(&id->index, index);
}
PyThread_release_lock(rt_ids->lock);
}
assert(index >= 0);

PyObject *obj;
obj = PyUnicode_DecodeUTF8Stateful(id->string,
strlen(id->string),
if (index < ids->size) {
obj = ids->array[index];
if (obj) {
// Return a borrowed reference
return obj;
}
}

obj = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string),
NULL, NULL);
if (!obj) {
return NULL;
}
PyUnicode_InternInPlace(&obj);

assert(!id->next);
id->object = obj;
id->next = static_strings;
static_strings = id;
return id->object;
if (index >= ids->size) {
// Overallocate to reduce the number of realloc
Py_ssize_t new_size = Py_MAX(index * 2, 16);
Py_ssize_t item_size = sizeof(ids->array[0]);
PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size);
if (new_array == NULL) {
PyErr_NoMemory();
return NULL;
}
memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size);
ids->array = new_array;
ids->size = new_size;
}

// The array stores a strong reference
ids->array[index] = obj;

// Return a borrowed reference
return obj;
}


static void
unicode_clear_static_strings(void)
unicode_clear_identifiers(PyThreadState *tstate)
{
_Py_Identifier *tmp, *s = static_strings;
while (s) {
Py_CLEAR(s->object);
tmp = s->next;
s->next = NULL;
s = tmp;
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _Py_unicode_ids *ids = &interp->unicode.ids;
for (Py_ssize_t i=0; i < ids->size; i++) {
Py_XDECREF(ids->array[i]);
}
static_strings = NULL;
ids->size = 0;
PyMem_Free(ids->array);
ids->array = NULL;
// Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid
// after Py_Finalize().
}


/* Internal function, doesn't check maximum character */

PyObject*
Expand Down Expand Up @@ -16238,9 +16279,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
Py_CLEAR(state->latin1[i]);
}

if (_Py_IsMainInterpreter(tstate)) {
unicode_clear_static_strings();
}
unicode_clear_identifiers(tstate);

_PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
}
Expand Down
30 changes: 19 additions & 11 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,24 @@ _PyRuntimeState_Init_impl(_PyRuntimeState *runtime)

runtime->interpreters.mutex = PyThread_allocate_lock();
if (runtime->interpreters.mutex == NULL) {
return _PyStatus_ERR("Can't initialize threads for interpreter");
return _PyStatus_NO_MEMORY();
}
runtime->interpreters.next_id = -1;

runtime->xidregistry.mutex = PyThread_allocate_lock();
if (runtime->xidregistry.mutex == NULL) {
return _PyStatus_ERR("Can't initialize threads for cross-interpreter data registry");
return _PyStatus_NO_MEMORY();
}

// Set it to the ID of the main thread of the main interpreter.
runtime->main_thread = PyThread_get_thread_ident();

runtime->unicode_ids.lock = PyThread_allocate_lock();
if (runtime->unicode_ids.lock == NULL) {
return _PyStatus_NO_MEMORY();
}
runtime->unicode_ids.next_index = 0;

return _PyStatus_OK();
}

Expand All @@ -108,17 +114,17 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime)
/* Force the allocator used by _PyRuntimeState_Init(). */
PyMemAllocatorEx old_alloc;
_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);

if (runtime->interpreters.mutex != NULL) {
PyThread_free_lock(runtime->interpreters.mutex);
runtime->interpreters.mutex = NULL;
#define FREE_LOCK(LOCK) \
if (LOCK != NULL) { \
PyThread_free_lock(LOCK); \
LOCK = NULL; \
}

if (runtime->xidregistry.mutex != NULL) {
PyThread_free_lock(runtime->xidregistry.mutex);
runtime->xidregistry.mutex = NULL;
}
FREE_LOCK(runtime->interpreters.mutex);
FREE_LOCK(runtime->xidregistry.mutex);
FREE_LOCK(runtime->unicode_ids.lock);

#undef FREE_LOCK
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
}

Expand All @@ -139,12 +145,14 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex);
int reinit_main_id = _PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex);
int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex);
int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_ids.lock);

PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);

if (reinit_interp < 0
|| reinit_main_id < 0
|| reinit_xidregistry < 0)
|| reinit_xidregistry < 0
|| reinit_unicode_ids < 0)
{
return _PyStatus_ERR("Failed to reinitialize runtime locks");

Expand Down