Skip to content

Commit

Permalink
GH-127058: Make PySequence_Tuple safer and probably faster. (#127758)
Browse files Browse the repository at this point in the history
* Use a small buffer, then list when constructing a tuple from an arbitrary sequence.
  • Loading branch information
markshannon authored Dec 11, 2024
1 parent 359389e commit 5a23994
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 48 deletions.
2 changes: 1 addition & 1 deletion Include/internal/pycore_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ typedef struct {
union _PyStackRef;

PyAPI_FUNC(PyObject *)_PyList_FromStackRefSteal(const union _PyStackRef *src, Py_ssize_t n);

PyAPI_FUNC(PyObject *)_PyList_AsTupleAndClear(PyListObject *v);

#ifdef __cplusplus
}
Expand Down
25 changes: 25 additions & 0 deletions Lib/test/test_capi/test_tuple.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest
import sys
import gc
from collections import namedtuple
from test.support import import_helper

Expand Down Expand Up @@ -257,5 +258,29 @@ def test__tuple_resize(self):
self.assertRaises(SystemError, resize, [1, 2, 3], 0, False)
self.assertRaises(SystemError, resize, NULL, 0, False)

def test_bug_59313(self):
# Before 3.14, the C-API function PySequence_Tuple
# would create incomplete tuples which were visible to
# the cycle GC, and this test would crash the interpeter.
TAG = object()
tuples = []

def referrer_tuples():
return [x for x in gc.get_referrers(TAG)
if isinstance(x, tuple)]

def my_iter():
nonlocal tuples
yield TAG # 'tag' gets stored in the result tuple
tuples += referrer_tuples()
for x in range(10):
tuples += referrer_tuples()
# Prior to 3.13 would raise a SystemError when the tuple needs to be resized
yield x

self.assertEqual(tuple(my_iter()), (TAG, *range(10)))
self.assertEqual(tuples, [])


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
``PySequence_Tuple`` now creates the resulting tuple atomically, preventing
partially created tuples being visible to the garbage collector or through
``gc.get_referrers()``
87 changes: 40 additions & 47 deletions Objects/abstract.c
Original file line number Diff line number Diff line change
Expand Up @@ -1993,9 +1993,6 @@ PyObject *
PySequence_Tuple(PyObject *v)
{
PyObject *it; /* iter(v) */
Py_ssize_t n; /* guess for result tuple size */
PyObject *result = NULL;
Py_ssize_t j;

if (v == NULL) {
return null_error();
Expand All @@ -2017,58 +2014,54 @@ PySequence_Tuple(PyObject *v)
if (it == NULL)
return NULL;

/* Guess result size and allocate space. */
n = PyObject_LengthHint(v, 10);
if (n == -1)
goto Fail;
result = PyTuple_New(n);
if (result == NULL)
goto Fail;

/* Fill the tuple. */
for (j = 0; ; ++j) {
Py_ssize_t n;
PyObject *buffer[8];
for (n = 0; n < 8; n++) {
PyObject *item = PyIter_Next(it);
if (item == NULL) {
if (PyErr_Occurred())
goto Fail;
break;
}
if (j >= n) {
size_t newn = (size_t)n;
/* The over-allocation strategy can grow a bit faster
than for lists because unlike lists the
over-allocation isn't permanent -- we reclaim
the excess before the end of this routine.
So, grow by ten and then add 25%.
*/
newn += 10u;
newn += newn >> 2;
if (newn > PY_SSIZE_T_MAX) {
/* Check for overflow */
PyErr_NoMemory();
Py_DECREF(item);
goto Fail;
if (PyErr_Occurred()) {
goto fail;
}
n = (Py_ssize_t)newn;
if (_PyTuple_Resize(&result, n) != 0) {
Py_DECREF(item);
goto Fail;
Py_DECREF(it);
return _PyTuple_FromArraySteal(buffer, n);
}
buffer[n] = item;
}
PyListObject *list = (PyListObject *)PyList_New(16);
if (list == NULL) {
goto fail;
}
assert(n == 8);
Py_SET_SIZE(list, n);
for (Py_ssize_t j = 0; j < n; j++) {
PyList_SET_ITEM(list, j, buffer[j]);
}
for (;;) {
PyObject *item = PyIter_Next(it);
if (item == NULL) {
if (PyErr_Occurred()) {
Py_DECREF(list);
Py_DECREF(it);
return NULL;
}
break;
}
if (_PyList_AppendTakeRef(list, item) < 0) {
Py_DECREF(list);
Py_DECREF(it);
return NULL;
}
PyTuple_SET_ITEM(result, j, item);
}

/* Cut tuple back if guess was too large. */
if (j < n &&
_PyTuple_Resize(&result, j) != 0)
goto Fail;

Py_DECREF(it);
return result;

Fail:
Py_XDECREF(result);
PyObject *res = _PyList_AsTupleAndClear(list);
Py_DECREF(list);
return res;
fail:
Py_DECREF(it);
while (n > 0) {
n--;
Py_DECREF(buffer[n]);
}
return NULL;
}

Expand Down
19 changes: 19 additions & 0 deletions Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3174,6 +3174,25 @@ PyList_AsTuple(PyObject *v)
return ret;
}

PyObject *
_PyList_AsTupleAndClear(PyListObject *self)
{
assert(self != NULL);
PyObject *ret;
if (self->ob_item == NULL) {
return PyTuple_New(0);
}
Py_BEGIN_CRITICAL_SECTION(self);
PyObject **items = self->ob_item;
Py_ssize_t size = Py_SIZE(self);
self->ob_item = NULL;
Py_SET_SIZE(self, 0);
ret = _PyTuple_FromArraySteal(items, size);
free_list_items(items, false);
Py_END_CRITICAL_SECTION();
return ret;
}

PyObject *
_PyList_FromStackRefSteal(const _PyStackRef *src, Py_ssize_t n)
{
Expand Down

0 comments on commit 5a23994

Please sign in to comment.