Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-101765: Fix SystemError / segmentation fault in iter __reduce__ when internal access of builtins.__dict__ exhausts the iterator #101769

Merged
merged 24 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f256afe
Fix undefined behavior in listiter_reduce from _PyEval_GetBuiltin sid…
ionite34 Feb 10, 2023
6b4faad
Update comment to not mention `__eq__`
ionite34 Feb 10, 2023
a6d6211
Fix undefined behavior in iter_reduce and calliter_reduce
ionite34 Feb 10, 2023
4ccf427
Update listiter_reduce_general comment
ionite34 Feb 10, 2023
c2c9cfb
Fix undefined behavior in bytearrayiter_reduce from _PyEval_GetBuilti…
ionite34 Feb 10, 2023
e2989d9
Fix undefined behavior in striter_reduce from _PyEval_GetBuiltin side…
ionite34 Feb 10, 2023
71960a8
Fix undefined behavior in tupleiter_reduce from _PyEval_GetBuiltin si…
ionite34 Feb 10, 2023
efa0540
Move iter call in unicodeiter_reduce before `it` pointer access due t…
ionite34 Feb 10, 2023
c5abb14
Add iter reduce tests for issue #101765
ionite34 Feb 10, 2023
45522c6
Remove C++ reference in comments
ionite34 Feb 10, 2023
4f5fc19
Remove C++ reference in comments
ionite34 Feb 10, 2023
049a8dd
Move builtin declarations inside if blocks
ionite34 Feb 10, 2023
ef4f955
Move _PyEval_GetBuiltin before gi checks, add gi NULL check in ga_ite…
ionite34 Feb 10, 2023
7d4afb0
Update iter reduce mutating tests for generic alias
ionite34 Feb 10, 2023
8e4418d
📜🤖 Added by blurb_it.
blurb-it[bot] Feb 10, 2023
d8ced8e
Fix backticks format for news
ionite34 Feb 10, 2023
178b8ea
Refactor iter reduce builtins mutation tests
ionite34 Feb 10, 2023
49ba8c3
Update iter mutating builtins test to include reversed iterator for l…
ionite34 Feb 10, 2023
93854e1
Add comment in unicodeiter_reduce for moving iter call before it poin…
ionite34 Feb 10, 2023
98ec3c6
Change test `__builtins__` to builtins import
ionite34 Feb 10, 2023
e661495
Change NEWS blurb phrasing
ionite34 Feb 10, 2023
19ab9c6
Update iter reduce mutating builtins test comments and simplify logic
ionite34 Feb 10, 2023
9b664c2
Update comments to better reflect issue
ionite34 Feb 24, 2023
c67b11a
Clarify test comments
ionite34 Feb 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions Lib/test/test_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from test.support import check_free_after_iterating, ALWAYS_EQ, NEVER_EQ
import pickle
import collections.abc
import functools
import contextlib
import builtins

# Test result of triple loop (too big to inline)
TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2),
Expand Down Expand Up @@ -91,6 +94,12 @@ def __call__(self):
raise IndexError # Emergency stop
return i

class EmptyIterClass:
def __len__(self):
return 0
def __getitem__(self, i):
raise StopIteration

# Main test suite

class TestCase(unittest.TestCase):
Expand Down Expand Up @@ -238,6 +247,78 @@ def test_mutating_seq_class_exhausted_iter(self):
self.assertEqual(list(empit), [5, 6])
self.assertEqual(list(a), [0, 1, 2, 3, 4, 5, 6])

def test_reduce_mutating_builtins_iter(self):
# This is a reproducer of issue #101765
# where iter `__reduce__` calls could lead to a segfault or SystemError
# depending on the order of C argument evaluation, which is undefined

# Backup builtins
builtins_dict = builtins.__dict__
orig = {"iter": iter, "reversed": reversed}

def run(builtin_name, item, sentinel=None):
it = iter(item) if sentinel is None else iter(item, sentinel)

class CustomStr:
def __init__(self, name, iterator):
self.name = name
self.iterator = iterator
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
# Here we exhaust our iterator, possibly changing
# its `it_seq` pointer to NULL
# The `__reduce__` call should correctly get
# the pointers after this call
list(self.iterator)
return other == self.name

# del is required here
# to not prematurely call __eq__ from
# the hash collision with the old key
del builtins_dict[builtin_name]
builtins_dict[CustomStr(builtin_name, it)] = orig[builtin_name]

return it.__reduce__()

types = [
(EmptyIterClass(),),
(bytes(8),),
(bytearray(8),),
((1, 2, 3),),
(lambda: 0, 0),
(tuple[int],) # GenericAlias
]

try:
run_iter = functools.partial(run, "iter")
# The returned value of `__reduce__` should not only be valid
# but also *empty*, as `it` was exhausted during `__eq__`
# i.e "xyz" returns (iter, ("",))
self.assertEqual(run_iter("xyz"), (orig["iter"], ("",)))
self.assertEqual(run_iter([1, 2, 3]), (orig["iter"], ([],)))

# _PyEval_GetBuiltin is also called for `reversed` in a branch of
# listiter_reduce_general
self.assertEqual(
run("reversed", orig["reversed"](list(range(8)))),
(iter, ([],))
)

for case in types:
self.assertEqual(run_iter(*case), (orig["iter"], ((),)))
finally:
# Restore original builtins
for key, func in orig.items():
# need to suppress KeyErrors in case
# a failed test deletes the key without setting anything
with contextlib.suppress(KeyError):
# del is required here
# to not invoke our custom __eq__ from
# the hash collision with the old key
del builtins_dict[key]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to del first? We set the correct key on the next line anyway.

Copy link
Contributor Author

@ionite34 ionite34 Feb 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's to not invoke the __eq__ from our test CustomStr, since the string key will collide with the same hash. I added some more comments regarding this in c67b11a

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right! This thing is breaking my assumptions about how dicts should behave :)

builtins_dict[key] = func

# Test a new_style class with __iter__ but no next() method
def test_new_style_iter_class(self):
class IterClass(object):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix SystemError / segmentation fault in iter ``__reduce__`` when internal access of ``builtins.__dict__`` keys mutates the iter object.
11 changes: 8 additions & 3 deletions Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2391,11 +2391,16 @@ PyDoc_STRVAR(length_hint_doc,
static PyObject *
bytearrayiter_reduce(bytesiterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq != NULL) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}
}

Expand Down
11 changes: 8 additions & 3 deletions Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3169,11 +3169,16 @@ PyDoc_STRVAR(length_hint_doc,
static PyObject *
striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq != NULL) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}
}

Expand Down
11 changes: 10 additions & 1 deletion Objects/genericaliasobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -877,8 +877,17 @@ ga_iter_clear(PyObject *self) {
static PyObject *
ga_iter_reduce(PyObject *self, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
gaiterobject *gi = (gaiterobject *)self;
return Py_BuildValue("N(O)", _PyEval_GetBuiltin(&_Py_ID(iter)), gi->obj);

/* _PyEval_GetBuiltin can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (gi->obj)
return Py_BuildValue("N(O)", iter, gi->obj);
else
return Py_BuildValue("N(())", iter);
}

static PyMethodDef ga_iter_methods[] = {
Expand Down
22 changes: 16 additions & 6 deletions Objects/iterobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,16 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(
static PyObject *
iter_reduce(seqiterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq != NULL)
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}

PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
Expand Down Expand Up @@ -239,11 +244,16 @@ calliter_iternext(calliterobject *it)
static PyObject *
calliter_reduce(calliterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_callable != NULL && it->it_sentinel != NULL)
return Py_BuildValue("N(OO)", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_callable, it->it_sentinel);
return Py_BuildValue("N(OO)", iter, it->it_callable, it->it_sentinel);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}

static PyMethodDef calliter_methods[] = {
Expand Down
12 changes: 8 additions & 4 deletions Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3444,18 +3444,22 @@ listiter_reduce_general(void *_it, int forward)
{
PyObject *list;

/* _PyEval_GetBuiltin can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

/* the objects are not the same, index is of different types! */
if (forward) {
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
_PyListIterObject *it = (_PyListIterObject *)_it;
if (it->it_seq) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
}
} else {
PyObject *reversed = _PyEval_GetBuiltin(&_Py_ID(reversed));
listreviterobject *it = (listreviterobject *)_it;
if (it->it_seq) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it_seq is NULL then reversed is leaked here, _PyEval_GetBuiltin returns a strong reference.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, but doesn't that mean the old code also leaked references? I'll prepare a PR to fix this now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh wait, the N code to Py_BuildValue steals a reference, so the previous code was right in terms of refcounting.

return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(reversed)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", reversed, it->it_seq, it->it_index);
}
}
/* empty iterator, create an empty list */
Expand Down
11 changes: 8 additions & 3 deletions Objects/tupleobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1048,11 +1048,16 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(
static PyObject *
tupleiter_reduce(_PyTupleIterObject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq)
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}

static PyObject *
Expand Down
11 changes: 8 additions & 3 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -14784,14 +14784,19 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(
static PyObject *
unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code,
* call must be before access of iterator pointers.
* see issue #101765 */

if (it->it_seq != NULL) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
PyObject *u = unicode_new_empty();
if (u == NULL)
return NULL;
return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), u);
return Py_BuildValue("N(N)", iter, u);
}
}

Expand Down