Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-117953: Always Run Extension Init Func in Main Interpreter First #118157

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 7 additions & 11 deletions Lib/test/test_import/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2454,10 +2454,6 @@ def setUpClass(cls):
# Start fresh.
cls.clean_up()

@classmethod
def tearDownClass(cls):
restore__testsinglephase()

def tearDown(self):
# Clean up the module.
self.clean_up()
Expand Down Expand Up @@ -3014,20 +3010,20 @@ def test_basic_multiple_interpreters_deleted_no_reset(self):
# * alive in 0 interpreters
# * module def in _PyRuntime.imports.extensions
# * mod init func ran for the first time (since reset)
# * m_copy is NULL (claered when the interpreter was destroyed)
# * m_copy is still set (owned by main interpreter)
# * module's global state was initialized, not reset

# Use a subinterpreter that sticks around.
loaded_interp1 = self.import_in_subinterp(interpid1)
self.check_common(loaded_interp1)
self.check_semi_fresh(loaded_interp1, loaded_main, base)
self.check_copied(loaded_interp1, base)

# At this point:
# * alive in 1 interpreter (interp1)
# * module def still in _PyRuntime.imports.extensions
# * mod init func ran for the second time (since reset)
# * m_copy was copied from interp1 (was NULL)
# * module's global state was updated, not reset
# * mod init func did not run again
# * m_copy was not changed
# * module's global state was not touched

# Use a subinterpreter while the previous one is still alive.
loaded_interp2 = self.import_in_subinterp(interpid2)
Expand All @@ -3038,8 +3034,8 @@ def test_basic_multiple_interpreters_deleted_no_reset(self):
# * alive in 2 interpreters (interp1, interp2)
# * module def still in _PyRuntime.imports.extensions
# * mod init func did not run again
# * m_copy was copied from interp2 (was from interp1)
# * module's global state was updated, not reset
# * m_copy was not changed
# * module's global state was not touched

@requires_subinterpreters
def test_basic_multiple_interpreters_reset_each(self):
Expand Down
7 changes: 4 additions & 3 deletions Lib/test/test_interpreters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from test.support import load_package_tests
from test.support import load_package_tests, Py_GIL_DISABLED

def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)
if not Py_GIL_DISABLED:
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
When a builtin or extension module is imported for the first time, while a
subinterpreter is active, the module's init function is now run by the main
interpreter first before import continues in the subinterpreter.
Consequently, single-phase init modules now fail in an isolated
subinterpreter without the init function running under that interpreter,
whereas before it would run under the subinterpreter *before* failing,
potentially leaving behind global state and callbacks and otherwise leaving
the module in an inconsistent state.
252 changes: 197 additions & 55 deletions Python/import.c
Original file line number Diff line number Diff line change
Expand Up @@ -1154,9 +1154,6 @@ init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict)
return -1;
}
// XXX We may want to make the copy immortal.
// XXX This incref shouldn't be necessary. We are decref'ing
// one to many times somewhere.
Py_INCREF(copied);

value->_m_dict = (struct cached_m_dict){
.copied=copied,
Expand Down Expand Up @@ -1580,6 +1577,26 @@ _PyImport_CheckGILForModule(PyObject* module, PyObject *module_name)
}
#endif

static PyThreadState *
switch_to_main_interpreter(PyThreadState *tstate)
{
if (_Py_IsMainInterpreter(tstate->interp)) {
return tstate;
}
PyThreadState *main_tstate = PyThreadState_New(_PyInterpreterState_Main());
if (main_tstate == NULL) {
return NULL;
}
main_tstate->_whence = _PyThreadState_WHENCE_EXEC;
#ifndef NDEBUG
PyThreadState *old_tstate = PyThreadState_Swap(main_tstate);
assert(old_tstate == tstate);
#else
(void)PyThreadState_Swap(main_tstate);
#endif
return main_tstate;
}

static PyObject *
get_core_module_dict(PyInterpreterState *interp,
PyObject *name, PyObject *path)
Expand Down Expand Up @@ -1936,24 +1953,171 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
struct extensions_cache_value *cached = NULL;
const char *name_buf = PyBytes_AS_STRING(info->name_encoded);

/* We cannot know if the module is single-phase init or
* multi-phase init until after we call its init function. Even
* in isolated interpreters (that do not support single-phase init),
* the init function will run without restriction. For multi-phase
* init modules that isn't a problem because the init function only
* runs PyModuleDef_Init() on the module's def and then returns it.
*
* However, for single-phase init the module's init function will
* create the module, create other objects (and allocate other
* memory), populate it and its module state, and initialze static
* types. Some modules store other objects and data in global C
* variables and register callbacks with the runtime/stdlib or
* even external libraries (which is part of why we can't just
* dlclose() the module in the error case). That's a problem
* for isolated interpreters since all of the above happens
* and only then * will the import fail. Memory will leak,
* callbacks will still get used, and sometimes there
* will be crashes (memory access violations
* and use-after-free).
*
* To put it another way, if the module is single-phase init
* then the import will probably break interpreter isolation
* and should fail ASAP. However, the module's init function
* will still get run. That means it may still store state
* in the shared-object/DLL address space (which never gets
* closed/cleared), including objects (e.g. static types).
* This is a problem for isolated subinterpreters since each
* has its own object allocator. If the loaded shared-object
* still holds a reference to an object after the corresponding
* interpreter has finalized then either we must let it leak
* or else any later use of that object by another interpreter
* (or across multiple init-fini cycles) will crash the process.
*
* To avoid all of that, we make sure the module's init function
* is always run first with the main interpreter active. If it was
* already the main interpreter then we can continue loading the
* module like normal. Otherwise, right after the init function,
* we take care of some import state bookkeeping, switch back
* to the subinterpreter, check for single-phase init,
* and then continue loading like normal. */

bool switched = false;
/* We *could* leave in place a legacy interpreter here
* (one that shares obmalloc/GIL with main interp),
* but there isn't a big advantage, we anticipate
* such interpreters will be increasingly uncommon,
* and the code is a bit simpler if we always switch
* to the main interpreter. */
PyThreadState *main_tstate = switch_to_main_interpreter(tstate);
if (main_tstate == NULL) {
return NULL;
}
else if (main_tstate != tstate) {
switched = true;
/* In the switched case, we could play it safe
* by getting the main interpreter's import lock here.
* It's unlikely to matter though. */
}

struct _Py_ext_module_loader_result res;
if (_PyImport_RunModInitFunc(p0, info, &res) < 0) {
int rc = _PyImport_RunModInitFunc(p0, info, &res);
if (rc < 0) {
/* We discard res.def. */
assert(res.module == NULL);
_Py_ext_module_loader_result_apply_error(&res, name_buf);
return NULL;
}
assert(!PyErr_Occurred());
assert(res.err == NULL);
else {
assert(!PyErr_Occurred());
assert(res.err == NULL);

mod = res.module;
res.module = NULL;
def = res.def;
assert(def != NULL);

/* Do anything else that should be done
* while still using the main interpreter. */
if (res.kind == _Py_ext_module_kind_SINGLEPHASE) {
/* Remember the filename as the __file__ attribute */
if (info->filename != NULL) {
// XXX There's a refleak somewhere with the filename.
// Until we can track it down, we intern it.
PyObject *filename = Py_NewRef(info->filename);
PyUnicode_InternInPlace(&filename);
if (PyModule_AddObjectRef(mod, "__file__", filename) < 0) {
PyErr_Clear(); /* Not important enough to report */
}
}

/* Update global import state. */
assert(def->m_base.m_index != 0);
struct singlephase_global_update singlephase = {
// XXX Modules that share a def should each get their own index,
// whereas currently they share (which means the per-interpreter
// cache is less reliable than it should be).
.m_index=def->m_base.m_index,
.origin=info->origin,
#ifdef Py_GIL_DISABLED
.md_gil=((PyModuleObject *)mod)->md_gil,
#endif
};
// gh-88216: Extensions and def->m_base.m_copy can be updated
// when the extension module doesn't support sub-interpreters.
if (def->m_size == -1) {
/* We will reload from m_copy. */
assert(def->m_base.m_init == NULL);
singlephase.m_dict = PyModule_GetDict(mod);
assert(singlephase.m_dict != NULL);
}
else {
/* We will reload via the init function. */
assert(def->m_size >= 0);
assert(def->m_base.m_copy == NULL);
singlephase.m_init = p0;
}
cached = update_global_state_for_extension(
tstate, info->path, info->name, def, &singlephase);
if (cached == NULL) {
assert(PyErr_Occurred());
goto main_finally;
}
}
}

mod = res.module;
res.module = NULL;
def = res.def;
assert(def != NULL);
main_finally:
/* Switch back to the subinterpreter. */
if (switched) {
assert(main_tstate != tstate);

/* Handle any exceptions, which we cannot propagate directly
* to the subinterpreter. */
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_MemoryError)) {
/* We trust it will be caught again soon. */
PyErr_Clear();
}
else {
/* Printing the exception should be sufficient. */
PyErr_PrintEx(0);
}
}

/* Any module we got from the init function will have to be
* reloaded in the subinterpreter. */
Py_CLEAR(mod);

PyThreadState_Clear(main_tstate);
(void)PyThreadState_Swap(tstate);
PyThreadState_Delete(main_tstate);
}

/*****************************************************************/
/* At this point we are back to the interpreter we started with. */
/*****************************************************************/

/* Finally we handle the error return from _PyImport_RunModInitFunc(). */
if (rc < 0) {
_Py_ext_module_loader_result_apply_error(&res, name_buf);
goto error;
}

if (res.kind == _Py_ext_module_kind_MULTIPHASE) {
assert_multiphase_def(def);
assert(mod == NULL);
/* Note that we cheat a little by not repeating the calls
* to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */
mod = PyModule_FromDefAndSpec(def, spec);
if (mod == NULL) {
goto error;
Expand All @@ -1962,57 +2126,35 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
else {
assert(res.kind == _Py_ext_module_kind_SINGLEPHASE);
assert_singlephase_def(def);
assert(PyModule_Check(mod));

if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) {
goto error;
}
assert(!PyErr_Occurred());

/* Remember the filename as the __file__ attribute */
if (info->filename != NULL) {
if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) {
PyErr_Clear(); /* Not important enough to report */
if (switched) {
/* We switched to the main interpreter to run the init
* function, so now we will "reload" the module from the
* cached data using the original subinterpreter. */
assert(mod == NULL);
mod = reload_singlephase_extension(tstate, cached, info);
if (mod == NULL) {
goto error;
}
}

/* Update global import state. */
assert(def->m_base.m_index != 0);
struct singlephase_global_update singlephase = {
// XXX Modules that share a def should each get their own index,
// whereas currently they share (which means the per-interpreter
// cache is less reliable than it should be).
.m_index=def->m_base.m_index,
.origin=info->origin,
#ifdef Py_GIL_DISABLED
.md_gil=((PyModuleObject *)mod)->md_gil,
#endif
};
// gh-88216: Extensions and def->m_base.m_copy can be updated
// when the extension module doesn't support sub-interpreters.
if (def->m_size == -1) {
/* We will reload from m_copy. */
assert(def->m_base.m_init == NULL);
singlephase.m_dict = PyModule_GetDict(mod);
assert(singlephase.m_dict != NULL);
assert(!PyErr_Occurred());
assert(PyModule_Check(mod));
}
else {
/* We will reload via the init function. */
assert(def->m_size >= 0);
assert(def->m_base.m_copy == NULL);
singlephase.m_init = p0;
}
cached = update_global_state_for_extension(
tstate, info->path, info->name, def, &singlephase);
if (cached == NULL) {
goto error;
}

/* Update per-interpreter import state. */
PyObject *modules = get_modules_dict(tstate, true);
if (finish_singlephase_extension(
tstate, mod, cached, info->name, modules) < 0)
{
goto error;
assert(mod != NULL);
assert(PyModule_Check(mod));

/* Update per-interpreter import state. */
PyObject *modules = get_modules_dict(tstate, true);
if (finish_singlephase_extension(
tstate, mod, cached, info->name, modules) < 0)
{
goto error;
}
}
}

Expand Down
Loading