From cf54090f7d87f62d9b9834fa78558406d498bee1 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 31 Jul 2022 12:53:22 -0700 Subject: [PATCH 1/5] Add failing regression tests --- Lib/test/test_code.py | 21 +++++++++++++++++++++ Lib/test/test_compile.py | 27 ++++++++++++++++++++++++--- Lib/test/test_syntax.py | 3 ++- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index fd68f6dee7915a..2386cf6b59f396 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -428,6 +428,27 @@ def func(): self.assertIsNone(line) self.assertEqual(end_line, new_code.co_firstlineno + 1) + def test_code_equality(self): + def f(): + try: + a() + except: + b() + else: + c() + finally: + d() + code_a = f.__code__ + code_b = code_a.replace(co_linetable=b"") + code_c = code_a.replace(co_exceptiontable=b"") + code_d = code_b.replace(co_exceptiontable=b"") + self.assertNotEqual(code_a, code_b) + self.assertNotEqual(code_a, code_c) + self.assertNotEqual(code_a, code_d) + self.assertNotEqual(code_b, code_c) + self.assertNotEqual(code_b, code_d) + self.assertNotEqual(code_c, code_d) + def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 11940bec492d89..d4cc86a3e75f61 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -613,7 +613,7 @@ def check_same_constant(const): exec(code, ns) f1 = ns['f1'] f2 = ns['f2'] - self.assertIs(f1.__code__, f2.__code__) + self.assertEqual(f1.__code__.co_consts, f2.__code__.co_consts) self.check_constant(f1, const) self.assertEqual(repr(f1()), repr(const)) @@ -626,7 +626,7 @@ def check_same_constant(const): # Note: "lambda: ..." emits "LOAD_CONST Ellipsis", # whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis" f1, f2 = lambda: ..., lambda: ... - self.assertIs(f1.__code__, f2.__code__) + self.assertEqual(f1.__code__.co_consts, f2.__code__.co_consts) self.check_constant(f1, Ellipsis) self.assertEqual(repr(f1()), repr(Ellipsis)) @@ -641,7 +641,7 @@ def check_same_constant(const): # {0} is converted to a constant frozenset({0}) by the peephole # optimizer f1, f2 = lambda x: x in {0}, lambda x: x in {0} - self.assertIs(f1.__code__, f2.__code__) + self.assertEqual(f1.__code__.co_consts, f2.__code__.co_consts) self.check_constant(f1, frozenset({0})) self.assertTrue(f1(0)) @@ -1300,6 +1300,27 @@ def f(): self.assertIsNotNone(end_column) self.assertLessEqual((line, column), (end_line, end_column)) + @support.cpython_only + def test_column_offset_deduplication(self): + # GH-95150: Code with different column offsets shouldn't be merged! + for source in [ + "(a for b in c), (a for b in c)", + "[a for b in c], [a for b in c]", + "{a for b in c}, {a for b in c}", + "{a: b for c in d}, {a: b for c in d}", + "lambda: a, lambda: a", + ]: + with self.subTest(source): + code = compile(source, "", "eval") + self.assertEqual(len(code.co_consts), 2) + self.assertIsInstance(code.co_consts[0], types.CodeType) + self.assertIsInstance(code.co_consts[1], types.CodeType) + self.assertNotEqual(code.co_consts[0], code.co_consts[1]) + self.assertNotEqual( + list(code.co_consts[0].co_positions()), + list(code.co_consts[1].co_positions()), + ) + class TestExpressionStackSize(unittest.TestCase): # These tests check that the computed stack size for a code object diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index b22a96b20298dd..ae1066924b3cf5 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -2012,7 +2012,8 @@ def fib(n): a, b = 0, 1 """ try: - self.assertEqual(compile(s1, '', 'exec'), compile(s2, '', 'exec')) + compile(s1, '', 'exec') + compile(s2, '', 'exec') except SyntaxError: self.fail("Indented statement over multiple lines is valid") From 3022f26f148c2bb45589b622c2a883ed8fc9d7e9 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 31 Jul 2022 12:53:57 -0700 Subject: [PATCH 2/5] Use line/exception tables for code hash/equality --- Objects/codeobject.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 2f757c4d8a9868..7ebbfdbdec18b3 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1695,6 +1695,15 @@ code_richcompare(PyObject *self, PyObject *other, int op) eq = PyObject_RichCompareBool(co->co_localsplusnames, cp->co_localsplusnames, Py_EQ); if (eq <= 0) goto unequal; + eq = PyObject_RichCompareBool(co->co_linetable, cp->co_linetable, Py_EQ); + if (eq <= 0) { + goto unequal; + } + eq = PyObject_RichCompareBool(co->co_exceptiontable, + cp->co_exceptiontable, Py_EQ); + if (eq <= 0) { + goto unequal; + } if (op == Py_EQ) res = Py_True; @@ -1727,7 +1736,15 @@ code_hash(PyCodeObject *co) if (h2 == -1) return -1; h3 = PyObject_Hash(co->co_localsplusnames); if (h3 == -1) return -1; - h = h0 ^ h1 ^ h2 ^ h3 ^ + Py_hash_t h4 = PyObject_Hash(co->co_linetable); + if (h4 == -1) { + return -1; + } + Py_hash_t h5 = PyObject_Hash(co->co_exceptiontable); + if (h5 == -1) { + return -1; + } + h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^ co->co_argcount ^ co->co_posonlyargcount ^ co->co_kwonlyargcount ^ co->co_flags; if (h == -1) h = -2; From ab9a09e9e146de74ac3f5c978124ce1be1a9f91a Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 31 Jul 2022 13:23:33 -0700 Subject: [PATCH 3/5] blurb add --- .../2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst new file mode 100644 index 00000000000000..c3db4714188b3f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst @@ -0,0 +1,3 @@ +Update code object hashing and equality to consider all debugging and +exception handling tables. This fixes an issue where certain non-identical +code objects could be "deduplicated" during compilation. From aecca512f8260f0f8ce0e4786da133e2ddc9386a Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 31 Jul 2022 13:49:04 -0700 Subject: [PATCH 4/5] Update tests --- Lib/test/test_compile.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index d4cc86a3e75f61..0178994465f5e6 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -613,7 +613,7 @@ def check_same_constant(const): exec(code, ns) f1 = ns['f1'] f2 = ns['f2'] - self.assertEqual(f1.__code__.co_consts, f2.__code__.co_consts) + self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts) self.check_constant(f1, const) self.assertEqual(repr(f1()), repr(const)) @@ -626,7 +626,7 @@ def check_same_constant(const): # Note: "lambda: ..." emits "LOAD_CONST Ellipsis", # whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis" f1, f2 = lambda: ..., lambda: ... - self.assertEqual(f1.__code__.co_consts, f2.__code__.co_consts) + self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts) self.check_constant(f1, Ellipsis) self.assertEqual(repr(f1()), repr(Ellipsis)) @@ -641,7 +641,7 @@ def check_same_constant(const): # {0} is converted to a constant frozenset({0}) by the peephole # optimizer f1, f2 = lambda x: x in {0}, lambda x: x in {0} - self.assertEqual(f1.__code__.co_consts, f2.__code__.co_consts) + self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts) self.check_constant(f1, frozenset({0})) self.assertTrue(f1(0)) @@ -1304,14 +1304,14 @@ def f(): def test_column_offset_deduplication(self): # GH-95150: Code with different column offsets shouldn't be merged! for source in [ - "(a for b in c), (a for b in c)", - "[a for b in c], [a for b in c]", - "{a for b in c}, {a for b in c}", - "{a: b for c in d}, {a: b for c in d}", - "lambda: a, lambda: a", + "lambda: a", + "(a for b in c)", + "[a for b in c]", + "{a for b in c}", + "{a: b for c in d}", ]: with self.subTest(source): - code = compile(source, "", "eval") + code = compile(f"{source}, {source}", "", "eval") self.assertEqual(len(code.co_consts), 2) self.assertIsInstance(code.co_consts[0], types.CodeType) self.assertIsInstance(code.co_consts[1], types.CodeType) From 9b3221fd1f592e6b85f887e672ef75b88547f9ca Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 31 Jul 2022 16:06:37 -0700 Subject: [PATCH 5/5] make patchcheck --- Lib/test/test_compile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 0178994465f5e6..31086ef1560692 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1317,7 +1317,7 @@ def test_column_offset_deduplication(self): self.assertIsInstance(code.co_consts[1], types.CodeType) self.assertNotEqual(code.co_consts[0], code.co_consts[1]) self.assertNotEqual( - list(code.co_consts[0].co_positions()), + list(code.co_consts[0].co_positions()), list(code.co_consts[1].co_positions()), )