diff --git a/Include/internal/pycore_sysmodule.h b/Include/internal/pycore_sysmodule.h index 10d092cdc30a2c..79380f7172907e 100644 --- a/Include/internal/pycore_sysmodule.h +++ b/Include/internal/pycore_sysmodule.h @@ -8,6 +8,10 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#ifdef HAVE_FORK +extern PyStatus _PySys_ReInitStdio(void); +#endif + PyAPI_FUNC(int) _PySys_Audit( PyThreadState *tstate, const char *event, diff --git a/Lib/test/test_thread.py b/Lib/test/test_thread.py index d55fb731b6df54..18c1088becb211 100644 --- a/Lib/test/test_thread.py +++ b/Lib/test/test_thread.py @@ -1,4 +1,6 @@ +import io import os +import sys import unittest import random from test import support @@ -265,5 +267,107 @@ def tearDown(self): pass +class TestStdioAtForkReInit(unittest.TestCase): + + class MockStdio(io.TextIOWrapper): + def __init__(self): + import _io, tempfile + self._file = tempfile.mktemp() + raw = _io.FileIO(self._file, mode='wb') + buffer = _io.BufferedWriter(raw) + super().__init__(buffer) + + def __del__(self): + try: + self.close() + os.remove(self._file) + except: + pass + + def isatty(self): + # pretend to be a tty, so _PySys_ReInitStdio + # will attempt to reinitialize our buffer lock. + return True + + def setUp(self): + # Replace stdout & stderr with mock objects with the + # same underlying buffer type to avoid polluting the + # test output stream. + self._saved_stdout = sys.stdout + self._saved_stderr = sys.stderr + sys.stdout = self.MockStdio() + sys.stderr = self.MockStdio() + + def tearDown(self): + sys.stdout = self._saved_stdout + sys.stderr = self._saved_stderr + + @unittest.skipUnless(hasattr(os, 'fork'), 'need os.fork') + @threading_helper.reap_threads + def test_stdio_at_fork_reinit(self): + + # bpo-46210: Added _PySys_ReInitStdio to prevent children + # from deadlocking here if printer_thread held the stdout + # (or stderr) buffer's lock when one of the children forked. + + num_prints = 100 + num_forks = 100 + msg = 'hello'*10 + + def printer_thread(stdio): + for n in range(num_prints): + print(msg, file=stdio) + stdio.flush() + + def fork_processes(stdio): + pids = [] + for n in range(num_forks): + if pid := os.fork(): + pids.append(pid) + else: + print(msg, file=stdio) + stdio.flush() + os._exit(0) + + return pids + + def main(stdio): + thread.start_new_thread(printer_thread, (stdio,)) + pids = fork_processes(stdio) + for pid in pids: + support.wait_process(pid, exitcode=0) + return + + # Forking below is not necessary to illustrate the bug + # in bpo-46210 and its fix. The issue in bpo-46210 is + # that calling main(sys.stdout) or main(sys.stderr) + # is sufficient to cause a deadlock in print. We fork + # here only to allow us to give a single timeout to the + # main() call, since its failure mode (absent the fix) + # is for some subset of the forked child processes to + # deadlock at the moment when they try to print, rather + # than to raise an exception. Therefore, simply looping + # over the child pids and calling support.wait_process + # with a separate nonzero timeout for each child leads + # to a rather unpredictable total wait time, whereas + # forking again here at the top (though not necessary + # to illustrate the bug) allows us to give a predictable + # timeout to the process of waiting for the children. + # + # bpo-46210 is present if and only if one or more of the + # children forked by main() deadlock when they call print. + # + # pr-30310 proposes a fix following the example of the + # import lock, by providing a function _PySys_ReInitStdio + # that is called alongside the other preexisting lock + # reinitialization functions in PyOS_AfterFork_Child. + for stdio in (sys.stdout, sys.stderr): + with threading_helper.wait_threads_exit(): + if main_pid := os.fork(): + support.wait_process(main_pid, exitcode=0, timeout=5) + else: + main(stdio) + os._exit(0) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-30-15-50-39.bpo-46210.YRy67_.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-30-15-50-39.bpo-46210.YRy67_.rst new file mode 100644 index 00000000000000..c3ca26debf2a01 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-30-15-50-39.bpo-46210.YRy67_.rst @@ -0,0 +1,2 @@ +Add :c:func:`_at_fork_reinit` handler to ``buffer->lock`` of stdout and +stderr, to fix a deadlock in print. diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index bb93ae361f8925..a5a37470d91bfc 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -414,6 +414,20 @@ buffered_clear(buffered *self) return 0; } +#ifdef HAVE_FORK +int +_PyIO_buffered_at_fork_reinit(PyObject *self) +{ + if (!Py_IS_TYPE(self, &PyBufferedWriter_Type) && + !Py_IS_TYPE(self, &PyBufferedReader_Type)) { + return 0; + } + buffered *buffer = (buffered *)self; + buffer->owner = 0; + return _PyThread_at_fork_reinit(&buffer->lock); +} +#endif + /* Because this can call arbitrary code, it shouldn't be called when the refcount is 0 (that is, not directly from tp_dealloc unless the refcount has been temporarily re-incremented). */ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 6ef3610fce7c5e..7f82f8482b7b3b 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -25,6 +25,7 @@ #include "pycore_ceval.h" // _PyEval_ReInitThreads() #include "pycore_fileutils.h" // _Py_closerange() #include "pycore_import.h" // _PyImport_ReInitLock() +#include "pycore_sysmodule.h" // _PySys_ReInitStdio() #include "pycore_initconfig.h" // _PyStatus_EXCEPTION() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_object.h" // _PyObject_LookupSpecial() @@ -592,6 +593,11 @@ PyOS_AfterFork_Child(void) goto fatal_error; } + status = _PySys_ReInitStdio(); + if (_PyStatus_EXCEPTION(status)) { + goto fatal_error; + } + _PySignal_AfterFork(); status = _PyRuntimeState_ReInitThreads(runtime); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 57bf04dd306aee..aa0d2d5f03877b 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2895,6 +2895,53 @@ _PySys_InitCore(PyThreadState *tstate, PyObject *sysdict) return _PyStatus_ERR("can't initialize sys module"); } +#ifdef HAVE_FORK +extern int _PyIO_buffered_at_fork_reinit(PyObject *); + +static int +stream_at_fork_reinit(PyObject *stream) +{ + if (stream == NULL || Py_IsNone(stream)) { + return 0; + } + + /* The buffer attribute is not part of the TextIOBase API + * and may not exist in some implementations. If not present, + * we have no locks to reinitialize. */ + PyObject *buffer = PyObject_GetAttr(stream, &_Py_ID(buffer)); + if (buffer == NULL) { + PyErr_Clear(); + return 0; + } + + /* Reinitialize buffer->lock */ + int ret = _PyIO_buffered_at_fork_reinit(buffer); + Py_DECREF(buffer); + return ret; +} + +/* This function is called from PyOS_AfterFork_Child() to ensure that newly + created child processes do not share locks with the parent. */ +PyStatus +_PySys_ReInitStdio(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + + PyObject *sys_stdin = _PySys_GetAttr(tstate, &_Py_ID(stdin)); + PyObject *sys_stdout = _PySys_GetAttr(tstate, &_Py_ID(stdout)); + PyObject *sys_stderr = _PySys_GetAttr(tstate, &_Py_ID(stderr)); + + int reinit_stdin = stream_at_fork_reinit(sys_stdin); + int reinit_stdout = stream_at_fork_reinit(sys_stdout); + int reinit_stderr = stream_at_fork_reinit(sys_stderr); + + if (reinit_stdin < 0 || reinit_stdout < 0 || reinit_stderr < 0) { + return _PyStatus_ERR("Failed to reinitialize standard streams"); + } + return _PyStatus_OK(); +} +#endif + static int sys_add_xoption(PyObject *opts, const wchar_t *s) {