Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-100227: Make the Global PyModuleDef Cache Safe for Isolated Interpreters #102938

Closed
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
546b934
Factor out add_threadstate().
ericsnowcurrently Mar 21, 2023
0bcd136
Add _PyThreadState_InitDetached().
ericsnowcurrently Mar 21, 2023
35d5310
Add _PyThreadState_ClearDetached().
ericsnowcurrently Mar 21, 2023
cba9e34
Add _PyRuntime.cached_objects.main_tstate.
ericsnowcurrently Mar 21, 2023
3b1bb8b
Add _Py_AcquireGlobalObjectsState() and _Py_ReleaseGlobalObjectsState().
ericsnowcurrently Mar 22, 2023
eb42aa1
Add _Py_AddToGlobalDict().
ericsnowcurrently Mar 22, 2023
4e9da2d
Drop _Py_AcquireGlobalObjectsState() and _Py_ReleaseGlobalObjectsStat…
ericsnowcurrently Mar 22, 2023
6216207
Add acquire_global_objects_lock() and release_global_objects_lock().
ericsnowcurrently Mar 22, 2023
3c007c0
Add some TODO comments.
ericsnowcurrently Mar 13, 2023
7d95514
Factor out store_interned().
ericsnowcurrently Mar 20, 2023
5c20b84
Store a thread state to use just for interned strings.
ericsnowcurrently Mar 20, 2023
a3ae02a
Always use the main interpreter when possibly resizing the interned d…
ericsnowcurrently Mar 20, 2023
d5fbc37
Use _PyRuntime.cached_objects.main_tstate instead.
ericsnowcurrently Mar 21, 2023
459325f
Add _PyThreadState_IsBound() and _PyThreadState_Unbind().
ericsnowcurrently Mar 21, 2023
4f25244
Make sure the one-off tstate is bound before using it.
ericsnowcurrently Mar 21, 2023
e68535a
Use _Py_AcquireGlobalObjectsState() in store_interned().
ericsnowcurrently Mar 22, 2023
22753b3
Use _Py_AddToGlobalDict().
ericsnowcurrently Mar 22, 2023
5c7bfd7
Move the extensions dict to _PyRuntime.cached_objects.
ericsnowcurrently Mar 22, 2023
f1a33ce
Add _Py_GetFromGlobalDict().
ericsnowcurrently Mar 22, 2023
10e6d69
Add _Py_PopFromGlobalDict().
ericsnowcurrently Mar 22, 2023
7dbae68
Adjust _Py_AddToGlobalDict().
ericsnowcurrently Mar 22, 2023
3e08c1f
Use _Py_PopFromGlobalDict() in unicode_dealloc().
ericsnowcurrently Mar 22, 2023
1039a60
Use the global dict API in import.c.
ericsnowcurrently Mar 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Include/internal/pycore_global_objects.h
Original file line number Diff line number Diff line change
@@ -27,7 +27,21 @@ extern "C" {
_PyRuntime.cached_objects.NAME

struct _Py_cached_objects {
/* A thread state tied to the main interpreter,
used exclusively for when a global object (e.g. interned strings)
is resized (i.e. deallocated + allocated) from an arbitrary thread. */
PyThreadState main_tstate;

/* The dict of interned strings. */
PyObject *interned_strings;

/* A dict mapping (filename, name) to PyModuleDef for modules.
Only legacy (single-phase init) extension modules are added
and only if they support multiple initialization (m_size >- 0)
or are imported in the main interpreter.
This is initialized lazily in _PyImport_FixupExtensionObject().
Modules are added there and looked up in _imp.find_extension(). */
PyObject *extensions;
};

#define _Py_GLOBAL_OBJECT(NAME) \
9 changes: 2 additions & 7 deletions Include/internal/pycore_import.h
Original file line number Diff line number Diff line change
@@ -14,15 +14,10 @@ struct _import_runtime_state {
which is just about every time an extension module is imported.
See PyInterpreterState.modules_by_index for more info. */
Py_ssize_t last_module_index;
/* A dict mapping (filename, name) to PyModuleDef for modules.
Only legacy (single-phase init) extension modules are added
and only if they support multiple initialization (m_size >- 0)
or are imported in the main interpreter.
This is initialized lazily in _PyImport_FixupExtensionObject().
Modules are added there and looked up in _imp.find_extension(). */
PyObject *extensions;
/* Package context -- the full module name for package imports */
const char * pkgcontext;
/* The dict of cached module defs is over at
_PyRuntime.cached_objects.extensions. */
};

struct _import_state {
7 changes: 7 additions & 0 deletions Include/internal/pycore_pystate.h
Original file line number Diff line number Diff line change
@@ -127,6 +127,13 @@ PyAPI_FUNC(void) _PyThreadState_Init(
PyThreadState *tstate);
PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate);

extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *);
extern void _PyThreadState_ClearDetached(PyThreadState *);

extern PyObject * _Py_GetFromGlobalDict(PyObject *, PyObject *);
extern PyObject * _Py_AddToGlobalDict(PyObject *, PyObject *, PyObject *);
extern PyObject * _Py_PopFromGlobalDict(PyObject *, PyObject *);


static inline void
_PyThreadState_UpdateTracingState(PyThreadState *tstate)
3 changes: 3 additions & 0 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
@@ -58,6 +58,9 @@ extern PyTypeObject _PyExc_MemoryError;
.types = { \
.next_version_tag = 1, \
}, \
.cached_objects = { \
.main_tstate = _PyThreadState_INIT, \
}, \
.static_objects = { \
.singletons = { \
.small_ints = _Py_small_ints_INIT, \
1 change: 1 addition & 0 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@ struct _Py_unicode_runtime_ids {

struct _Py_unicode_runtime_state {
struct _Py_unicode_runtime_ids ids;
/* The interned dict is at _PyRuntime.cached_objects.interned_strings. */
};

/* fs_codec.encoding is initialized to NULL.
16 changes: 6 additions & 10 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
@@ -1534,7 +1534,8 @@ unicode_dealloc(PyObject *unicode)
PyDict_DelItem(). */
assert(Py_REFCNT(unicode) == 0);
Py_SET_REFCNT(unicode, 3);
if (PyDict_DelItem(interned, unicode) != 0) {
if (_Py_PopFromGlobalDict(interned, unicode) == NULL
&& PyErr_Occurred()) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
NULL);
}
@@ -14609,16 +14610,11 @@ PyUnicode_InternInPlace(PyObject **p)
}

PyObject *interned = get_interned_dict();
assert(interned != NULL);

PyObject *t = PyDict_SetDefault(interned, s, s);
if (t == NULL) {
PyErr_Clear();
return;
}

PyObject *t = _Py_AddToGlobalDict(interned, s, s);
if (t != s) {
Py_SETREF(*p, Py_NewRef(t));
if (t != NULL) {
Py_SETREF(*p, Py_NewRef(t));
}
return;
}

25 changes: 14 additions & 11 deletions Python/import.c
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@ static struct _inittab *inittab_copy = NULL;

#define INITTAB _PyRuntime.imports.inittab
#define LAST_MODULE_INDEX _PyRuntime.imports.last_module_index
#define EXTENSIONS _PyRuntime.imports.extensions
#define EXTENSIONS _Py_CACHED_OBJECT(extensions)

#define PKGCONTEXT (_PyRuntime.imports.pkgcontext)

@@ -889,7 +889,7 @@ _extensions_cache_get(PyObject *filename, PyObject *name)
if (key == NULL) {
return NULL;
}
PyModuleDef *def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
PyModuleDef *def = (PyModuleDef *)_Py_GetFromGlobalDict(extensions, key);
Py_DECREF(key);
return def;
}
@@ -909,9 +909,15 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def)
if (key == NULL) {
return -1;
}
int res = PyDict_SetItem(extensions, key, (PyObject *)def);
PyObject *existing = _Py_AddToGlobalDict(extensions, key, (PyObject *)def);
Py_DECREF(key);
if (res < 0) {
if (existing == NULL) {
return -1;
}
if (existing != (PyObject *)def) {
assert(!PyErr_Occurred());
PyErr_Format(PyExc_ImportError,
"could not add moduledef for %s to the cache", name);
return -1;
}
return 0;
@@ -928,14 +934,11 @@ _extensions_cache_delete(PyObject *filename, PyObject *name)
if (key == NULL) {
return -1;
}
if (PyDict_DelItem(extensions, key) < 0) {
if (!PyErr_ExceptionMatches(PyExc_KeyError)) {
Py_DECREF(key);
return -1;
}
PyErr_Clear();
}
PyObject *value = _Py_PopFromGlobalDict(extensions, key);
Py_DECREF(key);
if (value == NULL && PyErr_Occurred()) {
return -1;
}
return 0;
}

4 changes: 4 additions & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
@@ -635,6 +635,8 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
return status;
}

_PyThreadState_InitDetached(&runtime->cached_objects.main_tstate, interp);

*tstate_p = tstate;
return _PyStatus_OK();
}
@@ -1928,6 +1930,8 @@ Py_FinalizeEx(void)
// XXX Do this sooner during finalization.
// XXX Ensure finalizer errors are handled properly.

_PyThreadState_ClearDetached(&runtime->cached_objects.main_tstate);

finalize_interp_clear(tstate);
finalize_interp_delete(tstate->interp);

290 changes: 269 additions & 21 deletions Python/pystate.c
Original file line number Diff line number Diff line change
@@ -565,6 +565,210 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
#endif


//---------------
// global objects
//---------------

/* The global objects thread state is meant to be used in a very limited
way and should not be used to actually run any Python code. */

static PyThreadState *
bind_global_objects_state(_PyRuntimeState *runtime)
{
PyThreadState *main_tstate = &runtime->cached_objects.main_tstate;

bind_tstate(main_tstate);
/* Unlike _PyThreadState_Bind(), we do not modify gilstate TSS. */

return main_tstate;
}

static void
unbind_global_objects_state(_PyRuntimeState *runtime)
{
PyThreadState *main_tstate = &runtime->cached_objects.main_tstate;
assert(tstate_is_alive(main_tstate));
assert(!main_tstate->_status.active);
assert(gilstate_tss_get(runtime) != main_tstate);

unbind_tstate(main_tstate);

/* This thread state may be bound/unbound repeatedly,
so we must erase evidence that it was ever bound (or unbound). */
main_tstate->_status.bound = 0;
main_tstate->_status.unbound = 0;

/* We must fully unlink the thread state from any OS thread,
to allow it to be bound more than once. */
main_tstate->thread_id = 0;
#ifdef PY_HAVE_THREAD_NATIVE_ID
main_tstate->native_thread_id = 0;
#endif
}

static inline void
acquire_global_objects_lock(_PyRuntimeState *runtime)
{
/* For now we can rely on the GIL, so we don't actually
acquire a global lock here. */
assert(current_fast_get(runtime) != NULL);
}

static inline void
release_global_objects_lock(_PyRuntimeState *runtime)
{
/* For now we can rely on the GIL, so we don't actually
release a global lock here. */
assert(current_fast_get(runtime) != NULL);
}

PyObject *
_Py_GetFromGlobalDict(PyObject *dict, PyObject *key)
{
assert(dict != NULL);
assert(PyDict_CheckExact(dict));

_PyRuntimeState *runtime = &_PyRuntime;

/* Due to interpreter isolation we must hold a global lock,
starting at this point and ending before we return.
Note that the operations in this function are very fucused
and we should not expect any reentrancy. */
acquire_global_objects_lock(runtime);
/* We don't worry about the global objects state
since there are no memory operations. */
PyObject *value = PyDict_GetItemWithError(dict, key);
release_global_objects_lock(runtime);
return value;
}

PyObject *
_Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value)
{
assert(dict != NULL);
assert(PyDict_CheckExact(dict));

/* All global objects are stored in _PyRuntime
and owned by the main interpreter. */
_PyRuntimeState *runtime = &_PyRuntime;
PyThreadState *curts = current_fast_get(runtime);
PyInterpreterState *interp = curts->interp;
assert(interp != NULL); // The GIL must be held.

/* Due to interpreter isolation we must hold a global lock,
starting at this point and ending before we return.
Note that the operations in this function are very fucused
and we should not expect any reentrancy. */
acquire_global_objects_lock(runtime);

/* We only swap interpreters if necessary. */
PyObject *actual = PyDict_GetItemWithError(dict, key);
if (actual == NULL && !PyErr_Occurred()) {
/* Swap to the main interpreter, if necessary. */
PyThreadState *oldts = NULL;
if (!_Py_IsMainInterpreter(interp)) {
PyThreadState *main_tstate = bind_global_objects_state(runtime);

oldts = _PyThreadState_Swap(runtime, main_tstate);
assert(oldts != NULL);
assert(!_Py_IsMainInterpreter(oldts->interp));

/* The limitations of the global objects thread state apply
from this point to the point we swap back to oldts. */
}

/* This might trigger a resize, which is why we must "acquire"
the global object state. Also note that PyDict_SetItem()
must be compatible with our reentrancy and global objects state
constraints. */
if (PyDict_SetItem(dict, key, value) < 0) {
/* Raising an exception from one interpreter in another
is problematic, so we clear it and let the caller deal
with the returned NULL. */
assert(PyErr_ExceptionMatches(PyExc_MemoryError));
PyErr_Clear();
}
else {
actual = value;
}

/* Swap back, it it wasn't in the main interpreter already. */
if (oldts != NULL) {
// The returned tstate should be _PyRuntime.cached_objects.main_tstate.
_PyThreadState_Swap(runtime, oldts);

unbind_global_objects_state(runtime);
}
}

release_global_objects_lock(runtime);

// XXX Immortalize the key and value.

return actual;
}

PyObject *
_Py_PopFromGlobalDict(PyObject *dict, PyObject *key)
{
assert(dict != NULL);
assert(PyDict_CheckExact(dict));

/* All global objects are stored in _PyRuntime
and owned by the main interpreter. */
_PyRuntimeState *runtime = &_PyRuntime;
PyThreadState *curts = current_fast_get(runtime);
PyInterpreterState *interp = curts->interp;
assert(interp != NULL); // The GIL must be held.

/* Due to interpreter isolation we must hold a global lock,
starting at this point and ending before we return.
Note that the operations in this function are very fucused
and we should not expect any reentrancy. */
acquire_global_objects_lock(runtime);

/* We only swap interpreters if necessary. */
PyObject *value = PyDict_GetItemWithError(dict, key);
if (value != NULL) {
/* Swap to the main interpreter, if necessary. */
PyThreadState *oldts = NULL;
if (!_Py_IsMainInterpreter(interp)) {
PyThreadState *main_tstate = bind_global_objects_state(runtime);

oldts = _PyThreadState_Swap(runtime, main_tstate);
assert(oldts != NULL);
assert(!_Py_IsMainInterpreter(oldts->interp));

/* The limitations of the global objects thread state apply
from this point to the point we swap back to oldts. */
}

/* This might trigger a resize, which is why we must "acquire"
the global object state. Also note that PyDict_DelItem()
must be compatible with our reentrancy and global objects state
constraints. */
int res = PyDict_DelItem(dict, key);
assert(res == 0);
if (res < 0) {
/* This really shouldn't happen. */
PyErr_Clear();
}

/* Swap back, it it wasn't in the main interpreter already. */
if (oldts != NULL) {
// The returned tstate should be _PyRuntime.cached_objects.main_tstate.
_PyThreadState_Swap(runtime, oldts);

unbind_global_objects_state(runtime);
}
}

release_global_objects_lock(runtime);

return value;
}


/*************************************/
/* the per-interpreter runtime state */
/*************************************/
@@ -1217,8 +1421,7 @@ free_threadstate(PyThreadState *tstate)

static void
init_threadstate(PyThreadState *tstate,
PyInterpreterState *interp, uint64_t id,
PyThreadState *next)
PyInterpreterState *interp, uint64_t id)
{
if (tstate->_status.initialized) {
Py_FatalError("thread state already initialized");
@@ -1227,18 +1430,13 @@ init_threadstate(PyThreadState *tstate,
assert(interp != NULL);
tstate->interp = interp;

// next/prev are set in add_threadstate().
assert(tstate->next == NULL);
assert(tstate->prev == NULL);

assert(id > 0);
tstate->id = id;

assert(interp->threads.head == tstate);
assert((next != NULL && id != 1) || (next == NULL && id == 1));
if (next != NULL) {
assert(next->prev == NULL || next->prev == tstate);
next->prev = tstate;
}
tstate->next = next;
assert(tstate->prev == NULL);

// thread_id and native_thread_id are set in bind_tstate().

tstate->py_recursion_limit = interp->ceval.recursion_limit,
@@ -1259,6 +1457,22 @@ init_threadstate(PyThreadState *tstate,
tstate->_status.initialized = 1;
}

static void
add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
PyThreadState *next)
{
assert(interp->threads.head != tstate);
assert((next != NULL && tstate->id != 1) ||
(next == NULL && tstate->id == 1));
if (next != NULL) {
assert(next->prev == NULL || next->prev == tstate);
next->prev = tstate;
}
tstate->next = next;
assert(tstate->prev == NULL);
interp->threads.head = tstate;
}

static PyThreadState *
new_threadstate(PyInterpreterState *interp)
{
@@ -1298,9 +1512,9 @@ new_threadstate(PyInterpreterState *interp)
&initial._main_interpreter._initial_thread,
sizeof(*tstate));
}
interp->threads.head = tstate;

init_threadstate(tstate, interp, id, old_head);
init_threadstate(tstate, interp, id);
add_threadstate(interp, tstate, old_head);

HEAD_UNLOCK(runtime);
if (!used_newtstate) {
@@ -1347,6 +1561,33 @@ _PyThreadState_Init(PyThreadState *tstate)
Py_FatalError("_PyThreadState_Init() is for internal use only");
}

void
_PyThreadState_InitDetached(PyThreadState *tstate, PyInterpreterState *interp)
{
_PyRuntimeState *runtime = interp->runtime;

HEAD_LOCK(runtime);
interp->threads.next_unique_id += 1;
uint64_t id = interp->threads.next_unique_id;
HEAD_UNLOCK(runtime);

init_threadstate(tstate, interp, id);
// We do not call add_threadstate().
}


static void
clear_datastack(PyThreadState *tstate)
{
_PyStackChunk *chunk = tstate->datastack_chunk;
tstate->datastack_chunk = NULL;
while (chunk != NULL) {
_PyStackChunk *prev = chunk->previous;
_PyObject_VirtualFree(chunk, chunk->size);
chunk = prev;
}
}

void
PyThreadState_Clear(PyThreadState *tstate)
{
@@ -1421,7 +1662,6 @@ PyThreadState_Clear(PyThreadState *tstate)
// XXX Do it as early in the function as possible.
}


/* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */
static void
tstate_delete_common(PyThreadState *tstate)
@@ -1454,17 +1694,25 @@ tstate_delete_common(PyThreadState *tstate)
unbind_tstate(tstate);

// XXX Move to PyThreadState_Clear()?
_PyStackChunk *chunk = tstate->datastack_chunk;
tstate->datastack_chunk = NULL;
while (chunk != NULL) {
_PyStackChunk *prev = chunk->previous;
_PyObject_VirtualFree(chunk, chunk->size);
chunk = prev;
}
clear_datastack(tstate);

tstate->_status.finalized = 1;
}

void
_PyThreadState_ClearDetached(PyThreadState *tstate)
{
assert(!tstate->_status.bound);
assert(!tstate->_status.bound_gilstate);
assert(tstate->datastack_chunk == NULL);
assert(tstate->thread_id == 0);
assert(tstate->native_thread_id == 0);
assert(tstate->next == NULL);
assert(tstate->prev == NULL);

PyThreadState_Clear(tstate);
clear_datastack(tstate);
}

static void
zapthreads(PyInterpreterState *interp)