Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-100227: Make the Global Interned Dict Safe for Isolated Interpreters #102925

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Include/internal/pycore_global_objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ extern "C" {

struct _Py_cached_objects {
PyObject *interned_strings;
/* A thread state tied to the main interpreter,
used exclusively for when a global object (e.g. interned strings)
is resized (i.e. deallocated + allocated) from an arbitrary thread. */
PyThreadState main_tstate;
};

#define _Py_GLOBAL_OBJECT(NAME) \
Expand Down
5 changes: 5 additions & 0 deletions Include/internal/pycore_pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ PyAPI_FUNC(void) _PyThreadState_Init(
PyThreadState *tstate);
PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate);

extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *);
extern void _PyThreadState_ClearDetached(PyThreadState *);

extern PyObject * _Py_AddToGlobalDict(PyObject *, PyObject *, PyObject *);


static inline void
_PyThreadState_UpdateTracingState(PyThreadState *tstate)
Expand Down
3 changes: 3 additions & 0 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ extern PyTypeObject _PyExc_MemoryError;
.types = { \
.next_version_tag = 1, \
}, \
.cached_objects = { \
.main_tstate = _PyThreadState_INIT, \
}, \
.static_objects = { \
.singletons = { \
.small_ints = _Py_small_ints_INIT, \
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ struct _Py_unicode_runtime_ids {

struct _Py_unicode_runtime_state {
struct _Py_unicode_runtime_ids ids;
/* The interned dict is at _PyRuntime.cached_objects.interned_strings. */
};

/* fs_codec.encoding is initialized to NULL.
Expand Down
13 changes: 4 additions & 9 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -14609,16 +14609,11 @@ PyUnicode_InternInPlace(PyObject **p)
}

PyObject *interned = get_interned_dict();
assert(interned != NULL);

PyObject *t = PyDict_SetDefault(interned, s, s);
if (t == NULL) {
PyErr_Clear();
return;
}

PyObject *t = _Py_AddToGlobalDict(interned, s, s);
if (t != s) {
Py_SETREF(*p, Py_NewRef(t));
if (t != NULL) {
Py_SETREF(*p, Py_NewRef(t));
}
return;
}

Expand Down
4 changes: 4 additions & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,8 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
return status;
}

_PyThreadState_InitDetached(&runtime->cached_objects.main_tstate, interp);

*tstate_p = tstate;
return _PyStatus_OK();
}
Expand Down Expand Up @@ -1928,6 +1930,8 @@ Py_FinalizeEx(void)
// XXX Do this sooner during finalization.
// XXX Ensure finalizer errors are handled properly.

_PyThreadState_ClearDetached(&runtime->cached_objects.main_tstate);

finalize_interp_clear(tstate);
finalize_interp_delete(tstate->interp);

Expand Down
204 changes: 183 additions & 21 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,124 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
#endif


//---------------
// global objects
//---------------

/* The global objects thread state is meant to be used in a very limited
way and should not be used to actually run any Python code. */

static PyThreadState *
bind_global_objects_state(_PyRuntimeState *runtime)
{
PyThreadState *main_tstate = &runtime->cached_objects.main_tstate;

bind_tstate(main_tstate);
/* Unlike _PyThreadState_Bind(), we do not modify gilstate TSS. */

return main_tstate;
}

static void
unbind_global_objects_state(_PyRuntimeState *runtime)
{
PyThreadState *main_tstate = &runtime->cached_objects.main_tstate;
assert(tstate_is_alive(main_tstate));
assert(!main_tstate->_status.active);
assert(gilstate_tss_get(runtime) != main_tstate);

unbind_tstate(main_tstate);

/* This thread state may be bound/unbound repeatedly,
so we must erase evidence that it was ever bound (or unbound). */
main_tstate->_status.bound = 0;
main_tstate->_status.unbound = 0;

/* We must fully unlink the thread state from any OS thread,
to allow it to be bound more than once. */
main_tstate->thread_id = 0;
#ifdef PY_HAVE_THREAD_NATIVE_ID
main_tstate->native_thread_id = 0;
#endif
}

static inline void
acquire_global_objects_lock(_PyRuntimeState *runtime)
{
/* For now we can rely on the GIL, so we don't actually
acquire a global lock here. */
assert(current_fast_get(runtime) != NULL);
}

static inline void
release_global_objects_lock(_PyRuntimeState *runtime)
{
/* For now we can rely on the GIL, so we don't actually
release a global lock here. */
assert(current_fast_get(runtime) != NULL);
}

PyObject *
_Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value)
{
assert(dict != NULL);
assert(PyDict_CheckExact(dict));

/* All global objects are stored in _PyRuntime
and owned by the main interpreter. */
_PyRuntimeState *runtime = &_PyRuntime;
PyThreadState *curts = current_fast_get(runtime);
PyInterpreterState *interp = curts->interp;
assert(interp != NULL); // The GIL must be held.

/* Due to interpreter isolation we must hold a global lock,
starting at this point and ending before we return.
Note that the operations in this function are very fucused
and we should not expect any reentrancy. */
acquire_global_objects_lock(runtime);

/* Swap to the main interpreter, if necessary. */
PyThreadState *oldts = NULL;
if (!_Py_IsMainInterpreter(interp)) {
PyThreadState *main_tstate = bind_global_objects_state(runtime);

oldts = _PyThreadState_Swap(runtime, main_tstate);
assert(oldts != NULL);
assert(!_Py_IsMainInterpreter(oldts->interp));

/* The limitations of the global objects thread state apply
from this point to the point we swap back to oldts. */
}

/* This might trigger a resize, which is why we must "acquire"
the global object state. Also note that PyDict_SetDefault()
must be compatible with our reentrancy and global objects state
constraints. */
PyObject *actual = PyDict_SetDefault(dict, key, value);
if (actual == NULL) {
/* Raising an exception from one interpreter in another
is problematic, so we clear it and let the caller deal
with the returned NULL. */
assert(PyErr_ExceptionMatches(PyExc_MemoryError));
PyErr_Clear();
}

/* Swap back, it it wasn't in the main interpreter already. */
if (oldts != NULL) {
// The returned tstate should be _PyRuntime.cached_objects.main_tstate.
_PyThreadState_Swap(runtime, oldts);

unbind_global_objects_state(runtime);
}

release_global_objects_lock(runtime);

// XXX Immortalize the key and value.

return actual;
}


/*************************************/
/* the per-interpreter runtime state */
/*************************************/
Expand Down Expand Up @@ -1217,8 +1335,7 @@ free_threadstate(PyThreadState *tstate)

static void
init_threadstate(PyThreadState *tstate,
PyInterpreterState *interp, uint64_t id,
PyThreadState *next)
PyInterpreterState *interp, uint64_t id)
{
if (tstate->_status.initialized) {
Py_FatalError("thread state already initialized");
Expand All @@ -1227,18 +1344,13 @@ init_threadstate(PyThreadState *tstate,
assert(interp != NULL);
tstate->interp = interp;

// next/prev are set in add_threadstate().
assert(tstate->next == NULL);
assert(tstate->prev == NULL);

assert(id > 0);
tstate->id = id;

assert(interp->threads.head == tstate);
assert((next != NULL && id != 1) || (next == NULL && id == 1));
if (next != NULL) {
assert(next->prev == NULL || next->prev == tstate);
next->prev = tstate;
}
tstate->next = next;
assert(tstate->prev == NULL);

// thread_id and native_thread_id are set in bind_tstate().

tstate->py_recursion_limit = interp->ceval.recursion_limit,
Expand All @@ -1259,6 +1371,22 @@ init_threadstate(PyThreadState *tstate,
tstate->_status.initialized = 1;
}

static void
add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
PyThreadState *next)
{
assert(interp->threads.head != tstate);
assert((next != NULL && tstate->id != 1) ||
(next == NULL && tstate->id == 1));
if (next != NULL) {
assert(next->prev == NULL || next->prev == tstate);
next->prev = tstate;
}
tstate->next = next;
assert(tstate->prev == NULL);
interp->threads.head = tstate;
}

static PyThreadState *
new_threadstate(PyInterpreterState *interp)
{
Expand Down Expand Up @@ -1298,9 +1426,9 @@ new_threadstate(PyInterpreterState *interp)
&initial._main_interpreter._initial_thread,
sizeof(*tstate));
}
interp->threads.head = tstate;

init_threadstate(tstate, interp, id, old_head);
init_threadstate(tstate, interp, id);
add_threadstate(interp, tstate, old_head);

HEAD_UNLOCK(runtime);
if (!used_newtstate) {
Expand Down Expand Up @@ -1347,6 +1475,33 @@ _PyThreadState_Init(PyThreadState *tstate)
Py_FatalError("_PyThreadState_Init() is for internal use only");
}

void
_PyThreadState_InitDetached(PyThreadState *tstate, PyInterpreterState *interp)
{
_PyRuntimeState *runtime = interp->runtime;

HEAD_LOCK(runtime);
interp->threads.next_unique_id += 1;
uint64_t id = interp->threads.next_unique_id;
HEAD_UNLOCK(runtime);

init_threadstate(tstate, interp, id);
// We do not call add_threadstate().
}


static void
clear_datastack(PyThreadState *tstate)
{
_PyStackChunk *chunk = tstate->datastack_chunk;
tstate->datastack_chunk = NULL;
while (chunk != NULL) {
_PyStackChunk *prev = chunk->previous;
_PyObject_VirtualFree(chunk, chunk->size);
chunk = prev;
}
}

void
PyThreadState_Clear(PyThreadState *tstate)
{
Expand Down Expand Up @@ -1421,7 +1576,6 @@ PyThreadState_Clear(PyThreadState *tstate)
// XXX Do it as early in the function as possible.
}


/* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */
static void
tstate_delete_common(PyThreadState *tstate)
Expand Down Expand Up @@ -1454,17 +1608,25 @@ tstate_delete_common(PyThreadState *tstate)
unbind_tstate(tstate);

// XXX Move to PyThreadState_Clear()?
_PyStackChunk *chunk = tstate->datastack_chunk;
tstate->datastack_chunk = NULL;
while (chunk != NULL) {
_PyStackChunk *prev = chunk->previous;
_PyObject_VirtualFree(chunk, chunk->size);
chunk = prev;
}
clear_datastack(tstate);

tstate->_status.finalized = 1;
}

void
_PyThreadState_ClearDetached(PyThreadState *tstate)
{
assert(!tstate->_status.bound);
assert(!tstate->_status.bound_gilstate);
assert(tstate->datastack_chunk == NULL);
assert(tstate->thread_id == 0);
assert(tstate->native_thread_id == 0);
assert(tstate->next == NULL);
assert(tstate->prev == NULL);

PyThreadState_Clear(tstate);
clear_datastack(tstate);
}

static void
zapthreads(PyInterpreterState *interp)
Expand Down