diff --git a/Include/cpython/critical_section.h b/Include/cpython/critical_section.h index 35db3fb6a59ce6..d4c3ec9c67e504 100644 --- a/Include/cpython/critical_section.h +++ b/Include/cpython/critical_section.h @@ -85,6 +85,7 @@ PyCriticalSection2_End(PyCriticalSection2 *c); #ifndef Py_GIL_DISABLED # define Py_BEGIN_CRITICAL_SECTION(op) \ { +# define Py_EXIT_CRITICAL_SECTION() # define Py_END_CRITICAL_SECTION() \ } # define Py_BEGIN_CRITICAL_SECTION2(a, b) \ @@ -118,6 +119,10 @@ struct PyCriticalSection2 { PyCriticalSection _py_cs; \ PyCriticalSection_Begin(&_py_cs, _PyObject_CAST(op)) +# define Py_EXIT_CRITICAL_SECTION() \ + _PyCriticalSection_End(&_py_cs); + + # define Py_END_CRITICAL_SECTION() \ PyCriticalSection_End(&_py_cs); \ } diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 78cd0d54972660..f19780754edcc1 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -51,6 +51,11 @@ extern "C" { } \ } +# define Py_EXIT_CRITICAL_SECTION_SEQUENCE_FAST() \ + if (_should_lock_cs) { \ + _PyCriticalSection_End(&_cs); \ + } + // Asserts that the mutex is locked. The mutex must be held by the // top-most critical section otherwise there's the possibility // that the mutex would be swalled out in some code paths. @@ -78,6 +83,7 @@ extern "C" { # define Py_BEGIN_CRITICAL_SECTION_MUT(mut) { # define Py_BEGIN_CRITICAL_SECTION2_MUT(m1, m2) { # define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) { +# define Py_EXIT_CRITICAL_SECTION_SEQUENCE_FAST() # define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() } # define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) # define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) diff --git a/Lib/test/test_json/test_threading.py b/Lib/test/test_json/test_threading.py new file mode 100644 index 00000000000000..ab07cceceda289 --- /dev/null +++ b/Lib/test/test_json/test_threading.py @@ -0,0 +1,80 @@ +import unittest +from threading import Thread +from test.test_json import CTest +from test.support import threading_helper + + +def encode_json_helper(json, worker, data, number_of_threads, number_of_json_encodings=100): + worker_threads = [] + for index in range(number_of_threads): + worker_threads.append(Thread(target=worker, args=[data, index])) + for t in worker_threads: + t.start() + for ii in range(number_of_json_encodings): + json.dumps(data) + data.clear() + for t in worker_threads: + t.join() + + +class MyMapping(dict): + def __init__(self): + self.mapping = [] + + def items(self): + return self.mapping + + +@threading_helper.reap_threads +@threading_helper.requires_working_threading() +class TestJsonEncoding(CTest): + # Test encoding json with multiple threads modifying the data cannot + # corrupt the interpreter + + def test_json_mutating_list(self): + + def worker(data, index): + while data: + for d in data: + if len(d) > 5: + d.clear() + else: + d.append(index) + d.append(index) + d.append(index) + encode_json_helper(self.json, worker, [[], []], number_of_threads=16) + + def test_json_mutating_dict(self): + + def worker(data, index): + while data: + for d in data: + if len(d) > 5: + try: + d.pop(list(d)[0]) + except (KeyError, IndexError): + pass + else: + d[index] = index + encode_json_helper(self.json, worker, [{}, {}], number_of_threads=16) + + def test_json_mutating_mapping(self): + + def worker(data, index): + while data: + for d in data: + if len(d.mapping) > 3: + d.mapping.clear() + else: + d.mapping.append((index, index)) + encode_json_helper(self.json, + worker, [MyMapping(), MyMapping()], number_of_threads=16) + + +if __name__ == "__main__": + import time + + t0 = time.time() + unittest.main() + dt = time.time()-t0 + print(f'Done: {dt:.2f}') diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-04-20-26-21.gh-issue-116738.q_hPYq.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-04-20-26-21.gh-issue-116738.q_hPYq.rst new file mode 100644 index 00000000000000..8ae38e757ff925 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-04-20-26-21.gh-issue-116738.q_hPYq.rst @@ -0,0 +1 @@ +Make the module :mod:`json` safe to use under the free-theading build. diff --git a/Modules/_json.c b/Modules/_json.c index 9e29de0f22465f..49b688d28b5c41 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -10,6 +10,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_EnterRecursiveCall() +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION() #include "pycore_runtime.h" // _PyRuntime #include "pycore_pyerrors.h" // _PyErr_FormatNote @@ -1599,11 +1600,13 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) goto bail; + Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(items); for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { PyObject *item = PyList_GET_ITEM(items, i); if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + Py_EXIT_CRITICAL_SECTION_SEQUENCE_FAST(); goto bail; } @@ -1611,19 +1614,26 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, value = PyTuple_GET_ITEM(item, 1); if (encoder_encode_key_value(s, writer, &first, dct, key, value, new_newline_indent, - current_item_separator) < 0) + current_item_separator) < 0) { + Py_EXIT_CRITICAL_SECTION_SEQUENCE_FAST(); goto bail; + } } + Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); Py_CLEAR(items); } else { Py_ssize_t pos = 0; + Py_BEGIN_CRITICAL_SECTION(dct); while (PyDict_Next(dct, &pos, &key, &value)) { if (encoder_encode_key_value(s, writer, &first, dct, key, value, new_newline_indent, - current_item_separator) < 0) + current_item_separator) < 0) { + Py_EXIT_CRITICAL_SECTION(); goto bail; + } } + Py_END_CRITICAL_SECTION(); } if (ident != NULL) { @@ -1666,8 +1676,12 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); if (s_fast == NULL) return -1; + + Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(seq); + if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); + Py_EXIT_CRITICAL_SECTION_SEQUENCE_FAST(); return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2); } @@ -1735,10 +1749,12 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, if (_PyUnicodeWriter_WriteChar(writer, ']')) goto bail; Py_DECREF(s_fast); + Py_EXIT_CRITICAL_SECTION_SEQUENCE_FAST(); return 0; bail: Py_XDECREF(ident); + Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); Py_DECREF(s_fast); Py_XDECREF(separator_indent); Py_XDECREF(new_newline_indent);