Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-95150: Use position and exception tables for code hashing and equality #95509

Merged
merged 5 commits into from
Aug 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Lib/test/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,27 @@ def func():
self.assertIsNone(line)
self.assertEqual(end_line, new_code.co_firstlineno + 1)

def test_code_equality(self):
def f():
try:
a()
except:
b()
else:
c()
finally:
d()
code_a = f.__code__
code_b = code_a.replace(co_linetable=b"")
code_c = code_a.replace(co_exceptiontable=b"")
code_d = code_b.replace(co_exceptiontable=b"")
self.assertNotEqual(code_a, code_b)
self.assertNotEqual(code_a, code_c)
self.assertNotEqual(code_a, code_d)
self.assertNotEqual(code_b, code_c)
self.assertNotEqual(code_b, code_d)
self.assertNotEqual(code_c, code_d)


def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])
Expand Down
27 changes: 24 additions & 3 deletions Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ def check_same_constant(const):
exec(code, ns)
f1 = ns['f1']
f2 = ns['f2']
self.assertIs(f1.__code__, f2.__code__)
self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
self.check_constant(f1, const)
self.assertEqual(repr(f1()), repr(const))

Expand All @@ -626,7 +626,7 @@ def check_same_constant(const):
# Note: "lambda: ..." emits "LOAD_CONST Ellipsis",
# whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis"
f1, f2 = lambda: ..., lambda: ...
self.assertIs(f1.__code__, f2.__code__)
self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
self.check_constant(f1, Ellipsis)
self.assertEqual(repr(f1()), repr(Ellipsis))

Expand All @@ -641,7 +641,7 @@ def check_same_constant(const):
# {0} is converted to a constant frozenset({0}) by the peephole
# optimizer
f1, f2 = lambda x: x in {0}, lambda x: x in {0}
self.assertIs(f1.__code__, f2.__code__)
self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
self.check_constant(f1, frozenset({0}))
self.assertTrue(f1(0))

Expand Down Expand Up @@ -1300,6 +1300,27 @@ def f():
self.assertIsNotNone(end_column)
self.assertLessEqual((line, column), (end_line, end_column))

@support.cpython_only
def test_column_offset_deduplication(self):
# GH-95150: Code with different column offsets shouldn't be merged!
for source in [
"lambda: a",
"(a for b in c)",
"[a for b in c]",
"{a for b in c}",
"{a: b for c in d}",
]:
with self.subTest(source):
code = compile(f"{source}, {source}", "<test>", "eval")
self.assertEqual(len(code.co_consts), 2)
self.assertIsInstance(code.co_consts[0], types.CodeType)
self.assertIsInstance(code.co_consts[1], types.CodeType)
self.assertNotEqual(code.co_consts[0], code.co_consts[1])
self.assertNotEqual(
list(code.co_consts[0].co_positions()),
list(code.co_consts[1].co_positions()),
)


class TestExpressionStackSize(unittest.TestCase):
# These tests check that the computed stack size for a code object
Expand Down
3 changes: 2 additions & 1 deletion Lib/test/test_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -2012,7 +2012,8 @@ def fib(n):
a, b = 0, 1
"""
try:
self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
compile(s1, '<string>', 'exec')
compile(s2, '<string>', 'exec')
except SyntaxError:
self.fail("Indented statement over multiple lines is valid")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Update code object hashing and equality to consider all debugging and
exception handling tables. This fixes an issue where certain non-identical
code objects could be "deduplicated" during compilation.
19 changes: 18 additions & 1 deletion Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1695,6 +1695,15 @@ code_richcompare(PyObject *self, PyObject *other, int op)
eq = PyObject_RichCompareBool(co->co_localsplusnames,
cp->co_localsplusnames, Py_EQ);
if (eq <= 0) goto unequal;
eq = PyObject_RichCompareBool(co->co_linetable, cp->co_linetable, Py_EQ);
if (eq <= 0) {
goto unequal;
}
eq = PyObject_RichCompareBool(co->co_exceptiontable,
cp->co_exceptiontable, Py_EQ);
if (eq <= 0) {
goto unequal;
}

if (op == Py_EQ)
res = Py_True;
Expand Down Expand Up @@ -1727,7 +1736,15 @@ code_hash(PyCodeObject *co)
if (h2 == -1) return -1;
h3 = PyObject_Hash(co->co_localsplusnames);
if (h3 == -1) return -1;
h = h0 ^ h1 ^ h2 ^ h3 ^
Py_hash_t h4 = PyObject_Hash(co->co_linetable);
if (h4 == -1) {
return -1;
}
Py_hash_t h5 = PyObject_Hash(co->co_exceptiontable);
if (h5 == -1) {
return -1;
}
h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^
co->co_argcount ^ co->co_posonlyargcount ^ co->co_kwonlyargcount ^
co->co_flags;
if (h == -1) h = -2;
Expand Down