From a1a3b4c103860796c8dfae89fdf08dcae6a2857e Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 7 Feb 2024 20:30:42 -0700 Subject: [PATCH 01/17] Load extension modules under the main interpreter first. --- Python/import.c | 121 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 111 insertions(+), 10 deletions(-) diff --git a/Python/import.c b/Python/import.c index 33277073ac543d..2755b73959c66c 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1526,6 +1526,48 @@ _PyImport_CheckSubinterpIncompatibleExtensionAllowed(const char *name) return 0; } +static PyThreadState * +maybe_switch_to_main_interpreter(PyThreadState *tstate) +{ + PyThreadState *main_tstate = tstate; + if (check_multi_interp_extensions(tstate->interp)) { + /* + If the module is single-phase init then the import + will fail. However, the module's init function will still + get run. That means it may still store state in the + shared-object/DLL address space (which never gets + closed/cleared), including objects (e.g. static types). + + This is a problem for isolated subinterpreters since each + has its own object allocator. If the loaded shared-object + still holds a reference to an object after the corresponding + interpreter has finalized then either we must let it leak + or else any later use of that object by another interpreter + (or across multiple init-fini cycles) will crash the process. + + We avoid the problem by first loading the module in the main + interpreter. + + Here's another complication: the module's init function might + register callbacks, whether in Python (e.g. sys.stdin, atexit) + or in linked libraries. Thus we cannot just dlclose() the + module in this error case. + */ + main_tstate = PyThreadState_New(_PyInterpreterState_Main()); + if (main_tstate == NULL) { + return NULL; + } + main_tstate->_whence = _PyThreadState_WHENCE_EXEC; +#ifndef NDEBUG + PyThreadState *old_tstate = PyThreadState_Swap(main_tstate); + assert(old_tstate == tstate); +#else + (void)PyThreadState_Swap(main_tstate); +#endif + } + return main_tstate; +} + static PyObject * get_core_module_dict(PyInterpreterState *interp, PyObject *name, PyObject *path) @@ -1874,20 +1916,66 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, struct extensions_cache_value *cached = NULL; const char *name_buf = PyBytes_AS_STRING(info->name_encoded); + /* We cannot know if the module is single-phase init or + * multi-phase init until after we call its init function. Even + * in isolated interpreters (that do not support single-phase init), + * the init function will run without restriction. For multi-phase + * init modules that isn't a problem because the init function runs + * PyModuleDef_Init() on the module's def and then returns it. + * + * However, for single-phase init the module's init function will + * create the module, create other objects (and allocate other + * memory), populate it and its module state, and initialze static + * types. Some modules store other objects and data in global C + * variables and register callbacks with the runtime/stdlib or + * event external libraries. That's a problem for isolated + * interpreters since all of that happens and only then will + * the import fail. Memory will leak, callbacks will still + * get used, and sometimes there will be memory access + * violations and use-after-free crashes. + * + * To avoid that, we make sure the module's init function is always + * run first with the main interpreter active. If it was already + * the main interpreter then we can continue loading the module + * like normal. Otherwise, right after the init function, we switch + * back to the subinterpreter, check for single-phase init, and + * then continue loading like normal. */ + PyThreadState *main_tstate = maybe_switch_to_main_interpreter(tstate); + struct _Py_ext_module_loader_result res; - if (_PyImport_RunModInitFunc(p0, info, &res) < 0) { + int rc = _PyImport_RunModInitFunc(p0, info, &res); + if (rc < 0) { /* We discard res.def. */ assert(res.module == NULL); - _Py_ext_module_loader_result_apply_error(&res, name_buf); - return NULL; } - assert(!PyErr_Occurred()); - assert(res.err == NULL); + else { + assert(!PyErr_Occurred()); + assert(res.err == NULL); - mod = res.module; - res.module = NULL; - def = res.def; - assert(def != NULL); + mod = res.module; + res.module = NULL; + def = res.def; + assert(def != NULL); + } + + /* Switch back to the subinterpreter. */ + if (main_tstate != tstate) { + /* Any module we got from the init function will have to be + * reloaded in the subinterpreter. */ + Py_CLEAR(mod); + + (void)PyThreadState_Swap(tstate); + } + + /*****************************************************************/ + /* At this point we are back to the interpreter we started with. */ + /*****************************************************************/ + + /* Finally we handle the error return from _PyImport_RunModInitFunc(). */ + if (rc < 0) { + _Py_ext_module_loader_result_apply_error(&res, name_buf); + goto error; + } if (res.kind == _Py_ext_module_kind_MULTIPHASE) { assert_multiphase_def(def); @@ -1900,11 +1988,12 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, else { assert(res.kind == _Py_ext_module_kind_SINGLEPHASE); assert_singlephase_def(def); - assert(PyModule_Check(mod)); if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { goto error; } + assert(mod != NULL); + assert(PyModule_Check(mod)); /* Remember the filename as the __file__ attribute */ if (info->filename != NULL) { @@ -1942,6 +2031,18 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, goto error; } + if (main_tstate != tstate) { + /* We switched to the main interpreter to run the init + * function, so now we will "reload" the module from the + * cached data using the original subinterpreter. */ + Py_CLEAR(mod); + mod = reload_singlephase_extension(tstate, cached, info); + if (mod == NULL) { + goto error; + } + } + assert(!PyErr_Occurred()); + /* Update per-interpreter import state. */ PyObject *modules = get_modules_dict(tstate, true); if (finish_singlephase_extension( From 9d630f1da9d8462d1b6e0628534d2ac40927d2d6 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 16 Apr 2024 11:19:48 -0600 Subject: [PATCH 02/17] Tweak a comment. --- Python/import.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Python/import.c b/Python/import.c index 2755b73959c66c..0fd6ab6bd702f7 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1532,11 +1532,11 @@ maybe_switch_to_main_interpreter(PyThreadState *tstate) PyThreadState *main_tstate = tstate; if (check_multi_interp_extensions(tstate->interp)) { /* - If the module is single-phase init then the import - will fail. However, the module's init function will still - get run. That means it may still store state in the - shared-object/DLL address space (which never gets - closed/cleared), including objects (e.g. static types). + If the module is single-phase init then the import will fail. + However, the module's init function will still get run. + That means it may still store state in the shared-object/DLL + address space (which never gets closed/cleared), including + objects (e.g. static types). This is a problem for isolated subinterpreters since each has its own object allocator. If the loaded shared-object @@ -1545,13 +1545,14 @@ maybe_switch_to_main_interpreter(PyThreadState *tstate) or else any later use of that object by another interpreter (or across multiple init-fini cycles) will crash the process. - We avoid the problem by first loading the module in the main - interpreter. + We avoid the problem by first loading the module + in the main interpreter. - Here's another complication: the module's init function might - register callbacks, whether in Python (e.g. sys.stdin, atexit) - or in linked libraries. Thus we cannot just dlclose() the - module in this error case. + Here's another complication we avoid: the module's init + function might register callbacks, whether in Python + (e.g. sys.stdin, atexit) or in linked libraries. + Thus we cannot just dlclose() the module + in this error case. */ main_tstate = PyThreadState_New(_PyInterpreterState_Main()); if (main_tstate == NULL) { From 2d7ee11ba96a3a218c3ae15652b3cda14f7d18c8 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 16 Apr 2024 11:31:19 -0600 Subject: [PATCH 03/17] Do not switch if already on the main interpreter. --- Python/import.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Python/import.c b/Python/import.c index 0fd6ab6bd702f7..3b96bcfc1434ff 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1530,7 +1530,10 @@ static PyThreadState * maybe_switch_to_main_interpreter(PyThreadState *tstate) { PyThreadState *main_tstate = tstate; - if (check_multi_interp_extensions(tstate->interp)) { + if (_Py_IsMainInterpreter(tstate->interp)) { + /* There's no need to switch. */ + } + else if (check_multi_interp_extensions(tstate->interp)) { /* If the module is single-phase init then the import will fail. However, the module's init function will still get run. From f9e132f1b82e90d1c9d998737fda097690d9af87 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 17 Apr 2024 11:12:32 -0600 Subject: [PATCH 04/17] maybe_switch_to_main_interpreter() -> switch_to_main_interpreter() --- Python/import.c | 109 ++++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/Python/import.c b/Python/import.c index 3b96bcfc1434ff..ec1f1f20a52b24 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1527,48 +1527,22 @@ _PyImport_CheckSubinterpIncompatibleExtensionAllowed(const char *name) } static PyThreadState * -maybe_switch_to_main_interpreter(PyThreadState *tstate) +switch_to_main_interpreter(PyThreadState *tstate) { - PyThreadState *main_tstate = tstate; if (_Py_IsMainInterpreter(tstate->interp)) { - /* There's no need to switch. */ - } - else if (check_multi_interp_extensions(tstate->interp)) { - /* - If the module is single-phase init then the import will fail. - However, the module's init function will still get run. - That means it may still store state in the shared-object/DLL - address space (which never gets closed/cleared), including - objects (e.g. static types). - - This is a problem for isolated subinterpreters since each - has its own object allocator. If the loaded shared-object - still holds a reference to an object after the corresponding - interpreter has finalized then either we must let it leak - or else any later use of that object by another interpreter - (or across multiple init-fini cycles) will crash the process. - - We avoid the problem by first loading the module - in the main interpreter. - - Here's another complication we avoid: the module's init - function might register callbacks, whether in Python - (e.g. sys.stdin, atexit) or in linked libraries. - Thus we cannot just dlclose() the module - in this error case. - */ - main_tstate = PyThreadState_New(_PyInterpreterState_Main()); - if (main_tstate == NULL) { - return NULL; - } - main_tstate->_whence = _PyThreadState_WHENCE_EXEC; + return tstate; + } + PyThreadState *main_tstate = PyThreadState_New(_PyInterpreterState_Main()); + if (main_tstate == NULL) { + return NULL; + } + main_tstate->_whence = _PyThreadState_WHENCE_EXEC; #ifndef NDEBUG - PyThreadState *old_tstate = PyThreadState_Swap(main_tstate); - assert(old_tstate == tstate); + PyThreadState *old_tstate = PyThreadState_Swap(main_tstate); + assert(old_tstate == tstate); #else - (void)PyThreadState_Swap(main_tstate); + (void)PyThreadState_Swap(main_tstate); #endif - } return main_tstate; } @@ -1924,27 +1898,58 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, * multi-phase init until after we call its init function. Even * in isolated interpreters (that do not support single-phase init), * the init function will run without restriction. For multi-phase - * init modules that isn't a problem because the init function runs - * PyModuleDef_Init() on the module's def and then returns it. + * init modules that isn't a problem because the init function only + * runs PyModuleDef_Init() on the module's def and then returns it. * * However, for single-phase init the module's init function will * create the module, create other objects (and allocate other * memory), populate it and its module state, and initialze static * types. Some modules store other objects and data in global C * variables and register callbacks with the runtime/stdlib or - * event external libraries. That's a problem for isolated - * interpreters since all of that happens and only then will - * the import fail. Memory will leak, callbacks will still - * get used, and sometimes there will be memory access - * violations and use-after-free crashes. + * even external libraries (which is part of why we can't just + * dlclose() the module in the error case). That's a problem + * for isolated interpreters since all of the above happens + * and only then * will the import fail. Memory will leak, + * callbacks will still get used, and sometimes there + * will be crashes (memory access violations + * and use-after-free). + * + * To put it another way, if the module is single-phase init + * then the import will probably break interpreter isolation + * and should fail ASAP. However, the module's init function + * will still get run. That means it may still store state + * in the shared-object/DLL address space (which never gets + * closed/cleared), including objects (e.g. static types). + * This is a problem for isolated subinterpreters since each + * has its own object allocator. If the loaded shared-object + * still holds a reference to an object after the corresponding + * interpreter has finalized then either we must let it leak + * or else any later use of that object by another interpreter + * (or across multiple init-fini cycles) will crash the process. * - * To avoid that, we make sure the module's init function is always - * run first with the main interpreter active. If it was already - * the main interpreter then we can continue loading the module - * like normal. Otherwise, right after the init function, we switch - * back to the subinterpreter, check for single-phase init, and - * then continue loading like normal. */ - PyThreadState *main_tstate = maybe_switch_to_main_interpreter(tstate); + * To avoid all of that, we make sure the module's init function + * is always run first with the main interpreter active. If it was + * already the main interpreter then we can continue loading the + * module like normal. Otherwise, right after the init function, + * we take care of some import state bookkeeping, switch back + * to the subinterpreter, check for single-phase init, + * and then continue loading like normal. */ + + PyThreadState *main_tstate = NULL; + if (!_Py_IsMainInterpreter(tstate->interp)) { + /* We *could* leave in place a legacy interpreter here + * (one that shares obmalloc/GIL with main interp), + * but there isn't a big advantage, we anticipate + * such interpreters will be increasingly uncommon, + * and the code is a bit simpler if we always switch + * to the main interpreter. */ + main_tstate = switch_to_main_interpreter(tstate); + if (main_tstate == NULL) { + return NULL; + } + assert(main_tstate != tstate); + // XXX Get import lock. + } struct _Py_ext_module_loader_result res; int rc = _PyImport_RunModInitFunc(p0, info, &res); @@ -1984,6 +1989,8 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, if (res.kind == _Py_ext_module_kind_MULTIPHASE) { assert_multiphase_def(def); assert(mod == NULL); + /* Note that we cheat a little by not repeating the calls + * to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */ mod = PyModule_FromDefAndSpec(def, spec); if (mod == NULL) { goto error; From b09c44538e0ad63bbfe03c400477a5d5a4ebf84c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 10:38:40 -0600 Subject: [PATCH 05/17] Update the global cache while still under the main interpreter. --- Lib/test/test_import/__init__.py | 14 ++--- Python/import.c | 102 +++++++++++++++++-------------- 2 files changed, 62 insertions(+), 54 deletions(-) diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index b2aae774a12205..903a6d54d137ba 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -3014,20 +3014,20 @@ def test_basic_multiple_interpreters_deleted_no_reset(self): # * alive in 0 interpreters # * module def in _PyRuntime.imports.extensions # * mod init func ran for the first time (since reset) - # * m_copy is NULL (claered when the interpreter was destroyed) + # * m_copy is still set (owned by main interpreter) # * module's global state was initialized, not reset # Use a subinterpreter that sticks around. loaded_interp1 = self.import_in_subinterp(interpid1) self.check_common(loaded_interp1) - self.check_semi_fresh(loaded_interp1, loaded_main, base) + self.check_copied(loaded_interp1, base) # At this point: # * alive in 1 interpreter (interp1) # * module def still in _PyRuntime.imports.extensions - # * mod init func ran for the second time (since reset) - # * m_copy was copied from interp1 (was NULL) - # * module's global state was updated, not reset + # * mod init func did not run again + # * m_copy was not changed + # * module's global state was not touched # Use a subinterpreter while the previous one is still alive. loaded_interp2 = self.import_in_subinterp(interpid2) @@ -3038,8 +3038,8 @@ def test_basic_multiple_interpreters_deleted_no_reset(self): # * alive in 2 interpreters (interp1, interp2) # * module def still in _PyRuntime.imports.extensions # * mod init func did not run again - # * m_copy was copied from interp2 (was from interp1) - # * module's global state was updated, not reset + # * m_copy was not changed + # * module's global state was not touched @requires_subinterpreters def test_basic_multiple_interpreters_reset_each(self): diff --git a/Python/import.c b/Python/import.c index ec1f1f20a52b24..12cde8e9b0ba2e 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1967,6 +1967,46 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, assert(def != NULL); } + /* Do anything else that should be done + * while still using the main interpreter. */ + if (res.kind == _Py_ext_module_kind_SINGLEPHASE) { + /* Remember the filename as the __file__ attribute */ + if (info->filename != NULL) { + if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { + PyErr_Clear(); /* Not important enough to report */ + } + } + + /* Update global import state. */ + assert(def->m_base.m_index != 0); + struct singlephase_global_update singlephase = { + // XXX Modules that share a def should each get their own index, + // whereas currently they share (which means the per-interpreter + // cache is less reliable than it should be). + .m_index=def->m_base.m_index, + .origin=info->origin, + }; + // gh-88216: Extensions and def->m_base.m_copy can be updated + // when the extension module doesn't support sub-interpreters. + if (def->m_size == -1) { + /* We will reload from m_copy. */ + assert(def->m_base.m_init == NULL); + singlephase.m_dict = PyModule_GetDict(mod); + assert(singlephase.m_dict != NULL); + } + else { + /* We will reload via the init function. */ + assert(def->m_size >= 0); + assert(def->m_base.m_copy == NULL); + singlephase.m_init = p0; + } + cached = update_global_state_for_extension( + tstate, info->path, info->name, def, &singlephase); + if (cached == NULL) { + goto error; + } + } + /* Switch back to the subinterpreter. */ if (main_tstate != tstate) { /* Any module we got from the init function will have to be @@ -2003,63 +2043,31 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { goto error; } - assert(mod != NULL); - assert(PyModule_Check(mod)); - - /* Remember the filename as the __file__ attribute */ - if (info->filename != NULL) { - if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { - PyErr_Clear(); /* Not important enough to report */ - } - } - - /* Update global import state. */ - assert(def->m_base.m_index != 0); - struct singlephase_global_update singlephase = { - // XXX Modules that share a def should each get their own index, - // whereas currently they share (which means the per-interpreter - // cache is less reliable than it should be). - .m_index=def->m_base.m_index, - .origin=info->origin, - }; - // gh-88216: Extensions and def->m_base.m_copy can be updated - // when the extension module doesn't support sub-interpreters. - if (def->m_size == -1) { - /* We will reload from m_copy. */ - assert(def->m_base.m_init == NULL); - singlephase.m_dict = PyModule_GetDict(mod); - assert(singlephase.m_dict != NULL); - } - else { - /* We will reload via the init function. */ - assert(def->m_size >= 0); - assert(def->m_base.m_copy == NULL); - singlephase.m_init = p0; - } - cached = update_global_state_for_extension( - tstate, info->path, info->name, def, &singlephase); - if (cached == NULL) { - goto error; - } + assert(!PyErr_Occurred()); if (main_tstate != tstate) { /* We switched to the main interpreter to run the init * function, so now we will "reload" the module from the * cached data using the original subinterpreter. */ - Py_CLEAR(mod); + assert(mod == NULL); mod = reload_singlephase_extension(tstate, cached, info); if (mod == NULL) { goto error; } + assert(!PyErr_Occurred()); + assert(PyModule_Check(mod)); } - assert(!PyErr_Occurred()); - - /* Update per-interpreter import state. */ - PyObject *modules = get_modules_dict(tstate, true); - if (finish_singlephase_extension( - tstate, mod, cached, info->name, modules) < 0) - { - goto error; + else { + assert(mod != NULL); + assert(PyModule_Check(mod)); + + /* Update per-interpreter import state. */ + PyObject *modules = get_modules_dict(tstate, true); + if (finish_singlephase_extension( + tstate, mod, cached, info->name, modules) < 0) + { + goto error; + } } } From 8ca803341bf343f08157f6edb38e118b762d06b6 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 10:58:06 -0600 Subject: [PATCH 06/17] Add a NEWS entry. --- .../2024-05-06-10-57-54.gh-issue-117953.DqCzIs.rst | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-06-10-57-54.gh-issue-117953.DqCzIs.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-06-10-57-54.gh-issue-117953.DqCzIs.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-06-10-57-54.gh-issue-117953.DqCzIs.rst new file mode 100644 index 00000000000000..6b69c9311013bb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-06-10-57-54.gh-issue-117953.DqCzIs.rst @@ -0,0 +1,8 @@ +When a builtin or extension module is imported for the first time, while a +subinterpreter is active, the module's init function is now run by the main +interpreter first before import continues in the subinterpreter. +Consequently, single-phase init modules now fail in an isolated +subinterpreter without the init function running under that interpreter, +whereas before it would run under the subinterpreter *before* failing, +potentially leaving behind global state and callbacks and otherwise leaving +the module in an inconsistent state. From 122761cb5d81417f555dee8554859b19b19bfd33 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 11:38:31 -0600 Subject: [PATCH 07/17] Fix the main_tstate checks. --- Python/import.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/import.c b/Python/import.c index 12cde8e9b0ba2e..8cf90460ae2f32 100644 --- a/Python/import.c +++ b/Python/import.c @@ -2008,7 +2008,9 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, } /* Switch back to the subinterpreter. */ - if (main_tstate != tstate) { + if (main_tstate != NULL) { + assert(main_tstate != tstate); + /* Any module we got from the init function will have to be * reloaded in the subinterpreter. */ Py_CLEAR(mod); @@ -2045,7 +2047,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, } assert(!PyErr_Occurred()); - if (main_tstate != tstate) { + if (main_tstate != NULL) { /* We switched to the main interpreter to run the init * function, so now we will "reload" the module from the * cached data using the original subinterpreter. */ From bb3402ad55f69786f2b8ed2e86c80a8655c0d08b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 12:43:37 -0600 Subject: [PATCH 08/17] Switch back if there is an exception. --- Python/import.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/Python/import.c b/Python/import.c index 8cf90460ae2f32..617471eddc20f5 100644 --- a/Python/import.c +++ b/Python/import.c @@ -2003,14 +2003,28 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, cached = update_global_state_for_extension( tstate, info->path, info->name, def, &singlephase); if (cached == NULL) { - goto error; + goto main_finally; } } +main_finally: /* Switch back to the subinterpreter. */ if (main_tstate != NULL) { assert(main_tstate != tstate); + /* Handle any exceptions, which we cannot propagate directly + * to the subinterpreter. */ + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_MemoryError)) { + /* We trust it will be caught again soon. */ + PyErr_Clear(); + } + else { + /* Printing the exception should be sufficient. */ + PyErr_PrintEx(0); + } + } + /* Any module we got from the init function will have to be * reloaded in the subinterpreter. */ Py_CLEAR(mod); From 164a9eb25e26ff065438d8dcfd3b0c8579720c70 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 12:51:13 -0600 Subject: [PATCH 09/17] Cache the module only on success. --- Python/import.c | 72 ++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/Python/import.c b/Python/import.c index 617471eddc20f5..bb60b4cf019568 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1965,45 +1965,45 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, res.module = NULL; def = res.def; assert(def != NULL); - } - /* Do anything else that should be done - * while still using the main interpreter. */ - if (res.kind == _Py_ext_module_kind_SINGLEPHASE) { - /* Remember the filename as the __file__ attribute */ - if (info->filename != NULL) { - if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { - PyErr_Clear(); /* Not important enough to report */ + /* Do anything else that should be done + * while still using the main interpreter. */ + if (res.kind == _Py_ext_module_kind_SINGLEPHASE) { + /* Remember the filename as the __file__ attribute */ + if (info->filename != NULL) { + if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { + PyErr_Clear(); /* Not important enough to report */ + } } - } - /* Update global import state. */ - assert(def->m_base.m_index != 0); - struct singlephase_global_update singlephase = { - // XXX Modules that share a def should each get their own index, - // whereas currently they share (which means the per-interpreter - // cache is less reliable than it should be). - .m_index=def->m_base.m_index, - .origin=info->origin, - }; - // gh-88216: Extensions and def->m_base.m_copy can be updated - // when the extension module doesn't support sub-interpreters. - if (def->m_size == -1) { - /* We will reload from m_copy. */ - assert(def->m_base.m_init == NULL); - singlephase.m_dict = PyModule_GetDict(mod); - assert(singlephase.m_dict != NULL); - } - else { - /* We will reload via the init function. */ - assert(def->m_size >= 0); - assert(def->m_base.m_copy == NULL); - singlephase.m_init = p0; - } - cached = update_global_state_for_extension( - tstate, info->path, info->name, def, &singlephase); - if (cached == NULL) { - goto main_finally; + /* Update global import state. */ + assert(def->m_base.m_index != 0); + struct singlephase_global_update singlephase = { + // XXX Modules that share a def should each get their own index, + // whereas currently they share (which means the per-interpreter + // cache is less reliable than it should be). + .m_index=def->m_base.m_index, + .origin=info->origin, + }; + // gh-88216: Extensions and def->m_base.m_copy can be updated + // when the extension module doesn't support sub-interpreters. + if (def->m_size == -1) { + /* We will reload from m_copy. */ + assert(def->m_base.m_init == NULL); + singlephase.m_dict = PyModule_GetDict(mod); + assert(singlephase.m_dict != NULL); + } + else { + /* We will reload via the init function. */ + assert(def->m_size >= 0); + assert(def->m_base.m_copy == NULL); + singlephase.m_init = p0; + } + cached = update_global_state_for_extension( + tstate, info->path, info->name, def, &singlephase); + if (cached == NULL) { + goto main_finally; + } } } From 0c74b25edf44a4da239b4ca81ec2f49655ac8093 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 12:53:07 -0600 Subject: [PATCH 10/17] Add a missing assert. --- Python/import.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/import.c b/Python/import.c index bb60b4cf019568..140cbc8bcd0939 100644 --- a/Python/import.c +++ b/Python/import.c @@ -2002,6 +2002,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, cached = update_global_state_for_extension( tstate, info->path, info->name, def, &singlephase); if (cached == NULL) { + assert(PyErr_Occurred()); goto main_finally; } } From 7b9a29d97f8b8af6128d75a2bbc186fefc366c62 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 13:44:38 -0600 Subject: [PATCH 11/17] Clean up the temprary thread state. --- Python/import.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/import.c b/Python/import.c index 140cbc8bcd0939..7719c04ddb3362 100644 --- a/Python/import.c +++ b/Python/import.c @@ -2030,7 +2030,9 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, * reloaded in the subinterpreter. */ Py_CLEAR(mod); + PyThreadState_Clear(main_tstate); (void)PyThreadState_Swap(tstate); + PyThreadState_Delete(main_tstate); } /*****************************************************************/ From 7965d57f87de671649ab073d1bea8dd553e7500c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 13:54:45 -0600 Subject: [PATCH 12/17] Do not do an extra _Py_IsMainInterpreter() check. --- Python/import.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/Python/import.c b/Python/import.c index 7719c04ddb3362..dd17c653c69d93 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1935,20 +1935,22 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, * to the subinterpreter, check for single-phase init, * and then continue loading like normal. */ - PyThreadState *main_tstate = NULL; - if (!_Py_IsMainInterpreter(tstate->interp)) { - /* We *could* leave in place a legacy interpreter here - * (one that shares obmalloc/GIL with main interp), - * but there isn't a big advantage, we anticipate - * such interpreters will be increasingly uncommon, - * and the code is a bit simpler if we always switch - * to the main interpreter. */ - main_tstate = switch_to_main_interpreter(tstate); - if (main_tstate == NULL) { - return NULL; - } - assert(main_tstate != tstate); - // XXX Get import lock. + bool switched = false; + /* We *could* leave in place a legacy interpreter here + * (one that shares obmalloc/GIL with main interp), + * but there isn't a big advantage, we anticipate + * such interpreters will be increasingly uncommon, + * and the code is a bit simpler if we always switch + * to the main interpreter. */ + PyThreadState *main_tstate = switch_to_main_interpreter(tstate); + if (main_tstate == NULL) { + return NULL; + } + else if (main_tstate != tstate) { + switched = true; + /* In the switched case, we could play it safe + * by getting the main interpreter's import lock here. + * It's unlikely to matter though. */ } struct _Py_ext_module_loader_result res; @@ -2010,7 +2012,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, main_finally: /* Switch back to the subinterpreter. */ - if (main_tstate != NULL) { + if (switched) { assert(main_tstate != tstate); /* Handle any exceptions, which we cannot propagate directly @@ -2064,7 +2066,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, } assert(!PyErr_Occurred()); - if (main_tstate != NULL) { + if (switched) { /* We switched to the main interpreter to run the init * function, so now we will "reload" the module from the * cached data using the original subinterpreter. */ From 2abce96d617bb92c983306b45195a999f2c52af9 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 14:48:12 -0600 Subject: [PATCH 13/17] Drop the superfluous SinglephaseInitTests.tearDownClass(). --- Lib/test/test_import/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 903a6d54d137ba..64282d0f2d0bcf 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -2454,10 +2454,6 @@ def setUpClass(cls): # Start fresh. cls.clean_up() - @classmethod - def tearDownClass(cls): - restore__testsinglephase() - def tearDown(self): # Clean up the module. self.clean_up() From 0b16f3fb21d3c328259afe100e9a0ab762c646a6 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 20:11:20 -0600 Subject: [PATCH 14/17] Work around a refleak. --- Python/import.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Python/import.c b/Python/import.c index dd17c653c69d93..684595ce418731 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1147,9 +1147,6 @@ init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) return -1; } // XXX We may want to make the copy immortal. - // XXX This incref shouldn't be necessary. We are decref'ing - // one to many times somewhere. - Py_INCREF(copied); value->_m_dict = (struct cached_m_dict){ .copied=copied, @@ -1973,7 +1970,11 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, if (res.kind == _Py_ext_module_kind_SINGLEPHASE) { /* Remember the filename as the __file__ attribute */ if (info->filename != NULL) { - if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { + // XXX There's a refleak somewhere with the filename. + // Until we can track it down, we intern it. + PyObject *filename = Py_NewRef(info->filename); + PyUnicode_InternInPlace(&filename); + if (PyModule_AddObjectRef(mod, "__file__", filename) < 0) { PyErr_Clear(); /* Not important enough to report */ } } From f7d0ff9e8595acdeb05556f36620f238f633b0e9 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 21:15:45 -0600 Subject: [PATCH 15/17] Skip some tests when the GIL is disabled. --- Lib/test/test_interpreters/test_lifecycle.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_interpreters/test_lifecycle.py b/Lib/test/test_interpreters/test_lifecycle.py index ac24f6568acd95..62d6e4b32bb556 100644 --- a/Lib/test/test_interpreters/test_lifecycle.py +++ b/Lib/test/test_interpreters/test_lifecycle.py @@ -115,6 +115,7 @@ def run_python(self, argv, *, cwd=None): self.assertEqual(proc.stderr, '') return proc.stdout + @requires_gil_enabled def test_sys_path_0(self): # The main interpreter's sys.path[0] should be used by subinterpreters. script = ''' @@ -164,6 +165,7 @@ def test_sys_path_0(self): class FinalizationTests(TestBase): + @requires_gil_enabled @support.requires_subprocess() def test_gh_109793(self): # Make sure finalization finishes and the correct error code From 048342f88bb1d1311eaa3b2edcc78d65417c1885 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 21:25:44 -0600 Subject: [PATCH 16/17] Fix the tests. --- Lib/test/test_interpreters/test_lifecycle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_interpreters/test_lifecycle.py b/Lib/test/test_interpreters/test_lifecycle.py index 62d6e4b32bb556..bd0072283b6528 100644 --- a/Lib/test/test_interpreters/test_lifecycle.py +++ b/Lib/test/test_interpreters/test_lifecycle.py @@ -115,7 +115,7 @@ def run_python(self, argv, *, cwd=None): self.assertEqual(proc.stderr, '') return proc.stdout - @requires_gil_enabled + @support.requires_gil_enabled def test_sys_path_0(self): # The main interpreter's sys.path[0] should be used by subinterpreters. script = ''' @@ -165,7 +165,7 @@ def test_sys_path_0(self): class FinalizationTests(TestBase): - @requires_gil_enabled + @support.requires_gil_enabled @support.requires_subprocess() def test_gh_109793(self): # Make sure finalization finishes and the correct error code From 6d5311ec4854f45cff5b8cccc5ab5ef4a38c120c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 6 May 2024 21:40:58 -0600 Subject: [PATCH 17/17] Disable all interpreters tests under Py_GIL_DISABLED. --- Lib/test/test_interpreters/__init__.py | 7 ++++--- Lib/test/test_interpreters/test_lifecycle.py | 2 -- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_interpreters/__init__.py b/Lib/test/test_interpreters/__init__.py index 4b16ecc31156a5..52ff553f60d0d7 100644 --- a/Lib/test/test_interpreters/__init__.py +++ b/Lib/test/test_interpreters/__init__.py @@ -1,5 +1,6 @@ import os -from test.support import load_package_tests +from test.support import load_package_tests, Py_GIL_DISABLED -def load_tests(*args): - return load_package_tests(os.path.dirname(__file__), *args) +if not Py_GIL_DISABLED: + def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_interpreters/test_lifecycle.py b/Lib/test/test_interpreters/test_lifecycle.py index bd0072283b6528..ac24f6568acd95 100644 --- a/Lib/test/test_interpreters/test_lifecycle.py +++ b/Lib/test/test_interpreters/test_lifecycle.py @@ -115,7 +115,6 @@ def run_python(self, argv, *, cwd=None): self.assertEqual(proc.stderr, '') return proc.stdout - @support.requires_gil_enabled def test_sys_path_0(self): # The main interpreter's sys.path[0] should be used by subinterpreters. script = ''' @@ -165,7 +164,6 @@ def test_sys_path_0(self): class FinalizationTests(TestBase): - @support.requires_gil_enabled @support.requires_subprocess() def test_gh_109793(self): # Make sure finalization finishes and the correct error code