Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-116738: Make _json module safe in the free-threading build #119438

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
da0e917
Make the _json module thread safe
eendebakpt May 22, 2024
3797dfa
Update Modules/_json.c
eendebakpt May 22, 2024
366654c
handle goto and return statements
eendebakpt May 24, 2024
5b72cdf
Apply suggestions from code review
eendebakpt May 25, 2024
c4c24c3
Update Include/internal/pycore_critical_section.h
eendebakpt May 25, 2024
370191b
rename macro
eendebakpt May 31, 2024
93c4466
Merge branch 'main' into json_ft
eendebakpt May 31, 2024
eafd3c1
fix typo
eendebakpt May 31, 2024
daeec46
Merge branch 'json_ft' of github.com:eendebakpt/cpython into json_ft
eendebakpt May 31, 2024
d54baf2
fix missing to exit critical section
eendebakpt Jun 4, 2024
e5fa305
revert changes to tests
eendebakpt Jun 4, 2024
d4ddf5d
📜🤖 Added by blurb_it.
blurb-it[bot] Jun 4, 2024
67d942f
Merge branch 'main' into json_ft
eendebakpt Jun 4, 2024
4ffc1b2
Merge branch 'main' into json_ft
eendebakpt Aug 14, 2024
384ca59
sync with main
eendebakpt Aug 14, 2024
64e20aa
sync with main
eendebakpt Aug 14, 2024
e6ce9c9
update news entry
eendebakpt Aug 14, 2024
34885a0
fix normal build
eendebakpt Aug 14, 2024
2fe760b
Merge branch 'main' into json_ft
eendebakpt Aug 14, 2024
eebccac
add lock around result of PyMapping_Items
eendebakpt Aug 15, 2024
db8947c
add tests
eendebakpt Aug 15, 2024
c19ad14
fix argument of Py_END_CRITICAL_SECTION_SEQUENCE_FAST
eendebakpt Aug 15, 2024
8b12e0f
Merge branch 'main' into json_ft
eendebakpt Feb 10, 2025
78d3595
avoid Py_EXIT_CRITICAL_SECTION_SEQUENCE_FAST
eendebakpt Feb 10, 2025
6e8615f
use barriers in test
eendebakpt Feb 10, 2025
39ebc00
typo
eendebakpt Feb 10, 2025
7c5b185
whitespace
eendebakpt Feb 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions Lib/test/test_free_threading/test_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import unittest
from threading import Barrier, Thread
from test.test_json import CTest
from test.support import threading_helper


def encode_json_helper(json, worker, data, number_of_threads, number_of_json_encodings=100):
worker_threads = []
barrier = Barrier(number_of_threads)
for index in range(number_of_threads):
worker_threads.append(Thread(target=worker, args=[barrier, data, index]))
for t in worker_threads:
t.start()
for ii in range(number_of_json_encodings):
json.dumps(data)
data.clear()
for t in worker_threads:
t.join()


class MyMapping(dict):
def __init__(self):
self.mapping = []

def items(self):
return self.mapping


@threading_helper.reap_threads
@threading_helper.requires_working_threading()
class TestJsonEncoding(CTest):
# Test encoding json with multiple threads modifying the data cannot
# corrupt the interpreter

def test_json_mutating_list(self):

def worker(barrier, data, index):
barrier.wait()
while data:
for d in data:
if len(d) > 5:
d.clear()
else:
d.append(index)
d.append(index)
d.append(index)
encode_json_helper(self.json, worker, [[], []], number_of_threads=16)

def test_json_mutating_dict(self):

def worker(barrier, data, index):
barrier.wait()
while data:
for d in data:
if len(d) > 5:
try:
d.pop(list(d)[0])
except (KeyError, IndexError):
pass
else:
d[index] = index
encode_json_helper(self.json, worker, [{}, {}], number_of_threads=16)

def test_json_mutating_mapping(self):

def worker(barrier, data, index):
barrier.wait()
while data:
for d in data:
if len(d.mapping) > 3:
d.mapping.clear()
else:
d.mapping.append((index, index))
encode_json_helper(self.json,
worker, [MyMapping(), MyMapping()], number_of_threads=16)


if __name__ == "__main__":
import time

t0 = time.time()
unittest.main()
dt = time.time()-t0
print(f'Done: {dt:.2f}')
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Make the module :mod:`json` safe to use under the free-theading build.
130 changes: 93 additions & 37 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "Python.h"
#include "pycore_ceval.h" // _Py_EnterRecursiveCall()
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION()
#include "pycore_runtime.h" // _PyRuntime
#include "pycore_pyerrors.h" // _PyErr_FormatNote

Expand Down Expand Up @@ -1633,6 +1634,52 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
return 0;
}

static inline int
_encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *dct, PyObject *items,
Py_ssize_t indent_level, PyObject *indent_cache,
PyObject *separator)
{
PyObject *key, *value;
bool first = true;
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
PyObject *item = PyList_GET_ITEM(items, i);

if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
return -1;
}

key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1);
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
indent_level, indent_cache,
separator) < 0) {
return -1;
}
}

return 0;
}

static inline int
_encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *dct, Py_ssize_t indent_level,
PyObject *indent_cache, PyObject *separator)
{
PyObject *key, *value;
Py_ssize_t pos = 0;
bool first = true;
while (PyDict_Next(dct, &pos, &key, &value)) {
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
indent_level, indent_cache,
separator) < 0) {
return -1;
}
}
return 0;
}

static int
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *dct,
Expand All @@ -1641,8 +1688,6 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
/* Encode Python dict dct a JSON term */
PyObject *ident = NULL;
PyObject *items = NULL;
PyObject *key, *value;
bool first = true;

if (PyDict_GET_SIZE(dct) == 0) {
/* Fast path */
Expand Down Expand Up @@ -1682,33 +1727,29 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,

if (s->sort_keys || !PyDict_CheckExact(dct)) {
items = PyMapping_Items(dct);
if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0))
if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) {
goto bail;

for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
PyObject *item = PyList_GET_ITEM(items, i);

if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
goto bail;
}

key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1);
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
indent_level, indent_cache,
separator) < 0)
goto bail;
}

int result;
Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(items);
result = _encoder_iterate_mapping_lock_held(s, writer, dct,
items, indent_level, indent_cache, separator);
Py_END_CRITICAL_SECTION_SEQUENCE_FAST();
Py_CLEAR(items);
if (result < 0) {
Py_XDECREF(items);
goto bail;
}

} else {
Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) {
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
indent_level, indent_cache,
separator) < 0)
goto bail;
int result;
Py_BEGIN_CRITICAL_SECTION(dct);
result = _encoder_iterate_dict_lock_held(s, writer, dct,
indent_level, indent_cache, separator);
Py_END_CRITICAL_SECTION();
if (result < 0) {
goto bail;
}
}

Expand All @@ -1735,17 +1776,35 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
return -1;
}

static inline int
_encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *seq, PyObject *s_fast,
Py_ssize_t indent_level, PyObject *indent_cache, PyObject *separator)
{
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
if (i) {
if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
return -1;
}
}
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
return -1;
}
}
return 0;
}

static int
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *seq,
Py_ssize_t indent_level, PyObject *indent_cache)
{
PyObject *ident = NULL;
PyObject *s_fast = NULL;
Py_ssize_t i;

ident = NULL;
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
s_fast = PySequence_Fast(seq, "encoder_listencode_list needs a sequence");
if (s_fast == NULL)
return -1;
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Expand Down Expand Up @@ -1783,16 +1842,13 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
goto bail;
}
}
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
if (i) {
if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail;
}
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
goto bail;
}
int result;
Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(seq);
result = _encoder_iterate_fast_seq_lock_held(s, writer, seq, s_fast,
indent_level, indent_cache, separator);
Py_END_CRITICAL_SECTION_SEQUENCE_FAST();
if (result < 0) {
goto bail;
}
if (ident != NULL) {
if (PyDict_DelItem(s->markers, ident))
Expand Down
Loading