Skip to content

Commit 72c260c

Browse files
authored
[3.10] bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20085)" (GH-30422) (GH-30425)
This reverts commit ea25180. Keep "assert(interned == NULL);" in _PyUnicode_Fini(), but only for the main interpreter. Keep _PyUnicode_ClearInterned() changes avoiding the creation of a temporary Python list object. Leave the PyInterpreterState structure unchanged to keep the ABI backward compatibility with Python 3.10.0: rename the "interned" member to "unused_interned". (cherry picked from commit 35d6540)
1 parent 861a9aa commit 72c260c

File tree

4 files changed

+76
-26
lines changed

4 files changed

+76
-26
lines changed

Include/internal/pycore_interp.h

+3-9
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,9 @@ struct _Py_unicode_state {
7171
PyObject *latin1[256];
7272
struct _Py_unicode_fs_codec fs_codec;
7373

74-
/* This dictionary holds all interned unicode strings. Note that references
75-
to strings in this dictionary are *not* counted in the string's ob_refcnt.
76-
When the interned string reaches a refcnt of 0 the string deallocation
77-
function will delete the reference from this dictionary.
78-
79-
Another way to look at this is that to say that the actual reference
80-
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
81-
*/
82-
PyObject *interned;
74+
// Unused member kept for ABI backward compatibility with Python 3.10.0:
75+
// see bpo-46006.
76+
PyObject *unused_interned;
8377

8478
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
8579
struct _Py_unicode_ids ids;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a regression when a type method like ``__init__()`` is modified in a
2+
subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type
3+
``update_slot()``. Revert the change which made the Unicode dictionary of
4+
interned strings compatible with subinterpreters: the internal interned
5+
dictionary is shared again by all interpreters. Patch by Victor Stinner.

Objects/typeobject.c

+22
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ typedef struct PySlot_Offset {
5050
} PySlot_Offset;
5151

5252

53+
/* bpo-40521: Interned strings are shared by all subinterpreters */
54+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
55+
# define INTERN_NAME_STRINGS
56+
#endif
57+
5358
/* alphabetical order */
5459
_Py_IDENTIFIER(__abstractmethods__);
5560
_Py_IDENTIFIER(__annotations__);
@@ -3988,6 +3993,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
39883993
if (name == NULL)
39893994
return -1;
39903995
}
3996+
#ifdef INTERN_NAME_STRINGS
39913997
if (!PyUnicode_CHECK_INTERNED(name)) {
39923998
PyUnicode_InternInPlace(&name);
39933999
if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -3997,6 +4003,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
39974003
return -1;
39984004
}
39994005
}
4006+
#endif
40004007
}
40014008
else {
40024009
/* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -8344,10 +8351,17 @@ _PyTypes_InitSlotDefs(void)
83448351
for (slotdef *p = slotdefs; p->name; p++) {
83458352
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
83468353
assert(!p[1].name || p->offset <= p[1].offset);
8354+
#ifdef INTERN_NAME_STRINGS
83478355
p->name_strobj = PyUnicode_InternFromString(p->name);
83488356
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
83498357
return _PyStatus_NO_MEMORY();
83508358
}
8359+
#else
8360+
p->name_strobj = PyUnicode_FromString(p->name);
8361+
if (!p->name_strobj) {
8362+
return _PyStatus_NO_MEMORY();
8363+
}
8364+
#endif
83518365
}
83528366
slotdefs_initialized = 1;
83538367
return _PyStatus_OK();
@@ -8372,16 +8386,24 @@ update_slot(PyTypeObject *type, PyObject *name)
83728386
int offset;
83738387

83748388
assert(PyUnicode_CheckExact(name));
8389+
#ifdef INTERN_NAME_STRINGS
83758390
assert(PyUnicode_CHECK_INTERNED(name));
8391+
#endif
83768392

83778393
assert(slotdefs_initialized);
83788394
pp = ptrs;
83798395
for (p = slotdefs; p->name; p++) {
83808396
assert(PyUnicode_CheckExact(p->name_strobj));
83818397
assert(PyUnicode_CheckExact(name));
8398+
#ifdef INTERN_NAME_STRINGS
83828399
if (p->name_strobj == name) {
83838400
*pp++ = p;
83848401
}
8402+
#else
8403+
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
8404+
*pp++ = p;
8405+
}
8406+
#endif
83858407
}
83868408
*pp = NULL;
83878409
for (pp = ptrs; *pp; pp++) {

Objects/unicodeobject.c

+46-17
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,22 @@ extern "C" {
211211
# define OVERALLOCATE_FACTOR 4
212212
#endif
213213

214+
/* bpo-40521: Interned strings are shared by all interpreters. */
215+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
216+
# define INTERNED_STRINGS
217+
#endif
218+
219+
/* This dictionary holds all interned unicode strings. Note that references
220+
to strings in this dictionary are *not* counted in the string's ob_refcnt.
221+
When the interned string reaches a refcnt of 0 the string deallocation
222+
function will delete the reference from this dictionary.
223+
224+
Another way to look at this is that to say that the actual reference
225+
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
226+
*/
227+
#ifdef INTERNED_STRINGS
228+
static PyObject *interned = NULL;
229+
#endif
214230

215231
static struct _Py_unicode_state*
216232
get_unicode_state(void)
@@ -1936,20 +1952,21 @@ unicode_dealloc(PyObject *unicode)
19361952

19371953
case SSTATE_INTERNED_MORTAL:
19381954
{
1939-
struct _Py_unicode_state *state = get_unicode_state();
1955+
#ifdef INTERNED_STRINGS
19401956
/* Revive the dead object temporarily. PyDict_DelItem() removes two
19411957
references (key and value) which were ignored by
19421958
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19431959
to prevent calling unicode_dealloc() again. Adjust refcnt after
19441960
PyDict_DelItem(). */
19451961
assert(Py_REFCNT(unicode) == 0);
19461962
Py_SET_REFCNT(unicode, 3);
1947-
if (PyDict_DelItem(state->interned, unicode) != 0) {
1963+
if (PyDict_DelItem(interned, unicode) != 0) {
19481964
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19491965
NULL);
19501966
}
19511967
assert(Py_REFCNT(unicode) == 1);
19521968
Py_SET_REFCNT(unicode, 0);
1969+
#endif
19531970
break;
19541971
}
19551972

@@ -11600,11 +11617,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1160011617
if (PyUnicode_CHECK_INTERNED(left))
1160111618
return 0;
1160211619

11620+
#ifdef INTERNED_STRINGS
1160311621
assert(_PyUnicode_HASH(right_uni) != -1);
1160411622
Py_hash_t hash = _PyUnicode_HASH(left);
1160511623
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
1160611624
return 0;
1160711625
}
11626+
#endif
1160811627

1160911628
return unicode_compare_eq(left, right_uni);
1161011629
}
@@ -15833,21 +15852,21 @@ PyUnicode_InternInPlace(PyObject **p)
1583315852
return;
1583415853
}
1583515854

15855+
#ifdef INTERNED_STRINGS
1583615856
if (PyUnicode_READY(s) == -1) {
1583715857
PyErr_Clear();
1583815858
return;
1583915859
}
1584015860

15841-
struct _Py_unicode_state *state = get_unicode_state();
15842-
if (state->interned == NULL) {
15843-
state->interned = PyDict_New();
15844-
if (state->interned == NULL) {
15861+
if (interned == NULL) {
15862+
interned = PyDict_New();
15863+
if (interned == NULL) {
1584515864
PyErr_Clear(); /* Don't leave an exception */
1584615865
return;
1584715866
}
1584815867
}
1584915868

15850-
PyObject *t = PyDict_SetDefault(state->interned, s, s);
15869+
PyObject *t = PyDict_SetDefault(interned, s, s);
1585115870
if (t == NULL) {
1585215871
PyErr_Clear();
1585315872
return;
@@ -15864,9 +15883,13 @@ PyUnicode_InternInPlace(PyObject **p)
1586415883
this. */
1586515884
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1586615885
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15886+
#else
15887+
// PyDict expects that interned strings have their hash
15888+
// (PyASCIIObject.hash) already computed.
15889+
(void)unicode_hash(s);
15890+
#endif
1586715891
}
1586815892

15869-
1587015893
void
1587115894
PyUnicode_InternImmortal(PyObject **p)
1587215895
{
@@ -15900,25 +15923,29 @@ PyUnicode_InternFromString(const char *cp)
1590015923
void
1590115924
_PyUnicode_ClearInterned(PyInterpreterState *interp)
1590215925
{
15903-
struct _Py_unicode_state *state = &interp->unicode;
15904-
if (state->interned == NULL) {
15926+
if (!_Py_IsMainInterpreter(interp)) {
15927+
// interned dict is shared by all interpreters
15928+
return;
15929+
}
15930+
15931+
if (interned == NULL) {
1590515932
return;
1590615933
}
15907-
assert(PyDict_CheckExact(state->interned));
15934+
assert(PyDict_CheckExact(interned));
1590815935

1590915936
/* Interned unicode strings are not forcibly deallocated; rather, we give
1591015937
them their stolen references back, and then clear and DECREF the
1591115938
interned dict. */
1591215939

1591315940
#ifdef INTERNED_STATS
1591415941
fprintf(stderr, "releasing %zd interned strings\n",
15915-
PyDict_GET_SIZE(state->interned));
15942+
PyDict_GET_SIZE(interned));
1591615943

1591715944
Py_ssize_t immortal_size = 0, mortal_size = 0;
1591815945
#endif
1591915946
Py_ssize_t pos = 0;
1592015947
PyObject *s, *ignored_value;
15921-
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
15948+
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
1592215949
assert(PyUnicode_IS_READY(s));
1592315950

1592415951
switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15949,8 +15976,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1594915976
mortal_size, immortal_size);
1595015977
#endif
1595115978

15952-
PyDict_Clear(state->interned);
15953-
Py_CLEAR(state->interned);
15979+
PyDict_Clear(interned);
15980+
Py_CLEAR(interned);
1595415981
}
1595515982

1595615983

@@ -16322,8 +16349,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1632216349
{
1632316350
struct _Py_unicode_state *state = &interp->unicode;
1632416351

16325-
// _PyUnicode_ClearInterned() must be called before
16326-
assert(state->interned == NULL);
16352+
if (_Py_IsMainInterpreter(interp)) {
16353+
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16354+
assert(interned == NULL);
16355+
}
1632716356

1632816357
_PyUnicode_FiniEncodings(&state->fs_codec);
1632916358

0 commit comments

Comments
 (0)