Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.10] gh-101765: Fix SystemError / segmentation fault in iter __reduce__ when internal access of builtins.__dict__ exhausts the iterator (GH-101769) #102229

Merged
merged 2 commits into from
Feb 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions Lib/test/test_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from test.support import check_free_after_iterating, ALWAYS_EQ, NEVER_EQ
import pickle
import collections.abc
import functools
import contextlib
import builtins

# Test result of triple loop (too big to inline)
TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2),
Expand Down Expand Up @@ -81,6 +84,12 @@ class BadIterableClass:
def __iter__(self):
raise ZeroDivisionError

class EmptyIterClass:
def __len__(self):
return 0
def __getitem__(self, i):
raise StopIteration

# Main test suite

class TestCase(unittest.TestCase):
Expand Down Expand Up @@ -228,6 +237,77 @@ def test_mutating_seq_class_exhausted_iter(self):
self.assertEqual(list(empit), [5, 6])
self.assertEqual(list(a), [0, 1, 2, 3, 4, 5, 6])

def test_reduce_mutating_builtins_iter(self):
# This is a reproducer of issue #101765
# where iter `__reduce__` calls could lead to a segfault or SystemError
# depending on the order of C argument evaluation, which is undefined

# Backup builtins
builtins_dict = builtins.__dict__
orig = {"iter": iter, "reversed": reversed}

def run(builtin_name, item, sentinel=None):
it = iter(item) if sentinel is None else iter(item, sentinel)

class CustomStr:
def __init__(self, name, iterator):
self.name = name
self.iterator = iterator
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
# Here we exhaust our iterator, possibly changing
# its `it_seq` pointer to NULL
# The `__reduce__` call should correctly get
# the pointers after this call
list(self.iterator)
return other == self.name

# del is required here
# to not prematurely call __eq__ from
# the hash collision with the old key
del builtins_dict[builtin_name]
builtins_dict[CustomStr(builtin_name, it)] = orig[builtin_name]

return it.__reduce__()

types = [
(EmptyIterClass(),),
(bytes(8),),
(bytearray(8),),
((1, 2, 3),),
(lambda: 0, 0),
]

try:
run_iter = functools.partial(run, "iter")
# The returned value of `__reduce__` should not only be valid
# but also *empty*, as `it` was exhausted during `__eq__`
# i.e "xyz" returns (iter, ("",))
self.assertEqual(run_iter("xyz"), (orig["iter"], ("",)))
self.assertEqual(run_iter([1, 2, 3]), (orig["iter"], ([],)))

# _PyEval_GetBuiltin is also called for `reversed` in a branch of
# listiter_reduce_general
self.assertEqual(
run("reversed", orig["reversed"](list(range(8)))),
(iter, ([],))
)

for case in types:
self.assertEqual(run_iter(*case), (orig["iter"], ((),)))
finally:
# Restore original builtins
for key, func in orig.items():
# need to suppress KeyErrors in case
# a failed test deletes the key without setting anything
with contextlib.suppress(KeyError):
# del is required here
# to not invoke our custom __eq__ from
# the hash collision with the old key
del builtins_dict[key]
builtins_dict[key] = func

# Test a new_style class with __iter__ but no next() method
def test_new_style_iter_class(self):
class IterClass(object):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix SystemError / segmentation fault in iter ``__reduce__`` when internal access of ``builtins.__dict__`` keys mutates the iter object.
11 changes: 8 additions & 3 deletions Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2442,11 +2442,16 @@ static PyObject *
bytearrayiter_reduce(bytesiterobject *it, PyObject *Py_UNUSED(ignored))
{
_Py_IDENTIFIER(iter);
PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);

/* _PyEval_GetBuiltinId can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq != NULL) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
return Py_BuildValue("N(())", iter);
}
}

Expand Down
11 changes: 8 additions & 3 deletions Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3146,11 +3146,16 @@ static PyObject *
striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
{
_Py_IDENTIFIER(iter);
PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);

/* _PyEval_GetBuiltinId can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq != NULL) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
return Py_BuildValue("N(())", iter);
}
}

Expand Down
22 changes: 16 additions & 6 deletions Objects/iterobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,16 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(
static PyObject *
iter_reduce(seqiterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);

/* _PyEval_GetBuiltinId can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq != NULL)
return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
return Py_BuildValue("N(())", iter);
}

PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
Expand Down Expand Up @@ -243,11 +248,16 @@ calliter_iternext(calliterobject *it)
static PyObject *
calliter_reduce(calliterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);

/* _PyEval_GetBuiltinId can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_callable != NULL && it->it_sentinel != NULL)
return Py_BuildValue("N(OO)", _PyEval_GetBuiltinId(&PyId_iter),
it->it_callable, it->it_sentinel);
return Py_BuildValue("N(OO)", iter, it->it_callable, it->it_sentinel);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
return Py_BuildValue("N(())", iter);
}

static PyMethodDef calliter_methods[] = {
Expand Down
18 changes: 12 additions & 6 deletions Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3405,17 +3405,23 @@ listiter_reduce_general(void *_it, int forward)
_Py_IDENTIFIER(reversed);
PyObject *list;

/* _PyEval_GetBuiltinId can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

/* the objects are not the same, index is of different types! */
if (forward) {
PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);
listiterobject *it = (listiterobject *)_it;
if (it->it_seq)
return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
it->it_seq, it->it_index);
if (it->it_seq) {
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
}
} else {
PyObject *reversed = _PyEval_GetBuiltinId(&PyId_reversed);
listreviterobject *it = (listreviterobject *)_it;
if (it->it_seq)
return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_reversed),
it->it_seq, it->it_index);
if (it->it_seq) {
return Py_BuildValue("N(O)n", reversed, it->it_seq, it->it_index);
}
}
/* empty iterator, create an empty list */
list = PyList_New(0);
Expand Down
11 changes: 8 additions & 3 deletions Objects/tupleobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1115,11 +1115,16 @@ static PyObject *
tupleiter_reduce(tupleiterobject *it, PyObject *Py_UNUSED(ignored))
{
_Py_IDENTIFIER(iter);
PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);

/* _PyEval_GetBuiltinId can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq)
return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
return Py_BuildValue("N(())", iter);
}

static PyObject *
Expand Down
11 changes: 8 additions & 3 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -16070,14 +16070,19 @@ static PyObject *
unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
{
_Py_IDENTIFIER(iter);
PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);

/* _PyEval_GetBuiltinId can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq != NULL) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
PyObject *u = (PyObject *)_PyUnicode_New(0);
if (u == NULL)
return NULL;
return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u);
return Py_BuildValue("N(N)", iter, u);
}
}

Expand Down