From 41c1f59f2309170f7e6cb350c83218597cb9724a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 15 Dec 2021 15:26:17 +0100 Subject: [PATCH] bpo-46006: Fix _PyUnicode_EqualToASCIIId() for interned strings Fix string comparisons for subinterpreters: no longer make the assumption that two interned strings are not equal if they have different different memory addresses. This assumption is no longer true since interned strings have been made per interpreter in Python 3.10. Fix _PyUnicode_EqualToASCIIId() and type update_slot() functions. The change makes update_slot() a little bit less efficient: update_slot() now compares the strings contents, rather than comparing strings memory addresses. --- .../2021-12-15-15-30-04.bpo-46006.T4pbJR.rst | 4 ++++ Objects/typeobject.c | 5 ++++- Objects/unicodeobject.c | 15 +++++++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-12-15-15-30-04.bpo-46006.T4pbJR.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-15-15-30-04.bpo-46006.T4pbJR.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-15-15-30-04.bpo-46006.T4pbJR.rst new file mode 100644 index 00000000000000..db03957dca2e02 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-15-15-30-04.bpo-46006.T4pbJR.rst @@ -0,0 +1,4 @@ +Fix string comparisons for subinterpreters: no longer make the assumption +that two interned strings are not equal if they have different different +memory addresses. This assumption is no longer true since interned strings have +been made per interpreter in Python 3.10. Patch by Victor Stinner. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index af35180cdb9831..242d75ede6fc1a 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -8459,7 +8459,10 @@ update_slot(PyTypeObject *type, PyObject *name) for (p = slotdefs; p->name; p++) { assert(PyUnicode_CheckExact(p->name_strobj)); assert(PyUnicode_CheckExact(name)); - if (p->name_strobj == name) { + // bpo-46006: subinterpreters require to compare strings contents, even + // if both strings are interned. _PyUnicode_EQ() is required to keep + // support for built-in static types in subinterpreters. + if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) { *pp++ = p; } } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 14449bce70839f..bac9faea89bfbd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11336,11 +11336,18 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) return _PyUnicode_EqualToASCIIString(left, right->string); } - if (left == right_uni) + if (left == right_uni) { return 1; - - if (PyUnicode_CHECK_INTERNED(left)) - return 0; + } + // bpo-46006: The left string cannot be considered as not equal to the + // right string if the left string is interned, because the two string + // objects can belong to two interpreters. + // + // While an interpreter is supposed to only access objects that it created + // (bpo-40533), in practice in Python 3.11, it remains common that a + // subinterpreter access objects of the main interprter. For example, + // access attribute names (strings) of static types created by the main + // interpreter. assert(_PyUnicode_HASH(right_uni) != -1); Py_hash_t hash = _PyUnicode_HASH(left);