Skip to content

Commit 35d6540

Browse files
authored
bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20085)" (GH-30422)
This reverts commit ea25180. Keep "assert(interned == NULL);" in _PyUnicode_Fini(), but only for the main interpreter. Keep _PyUnicode_ClearInterned() changes avoiding the creation of a temporary Python list object.
1 parent e5894ca commit 35d6540

File tree

4 files changed

+75
-30
lines changed

4 files changed

+75
-30
lines changed

Include/internal/pycore_unicodeobject.h

+1-11
Original file line numberDiff line numberDiff line change
@@ -48,21 +48,11 @@ struct _Py_unicode_state {
4848
PyObject *latin1[256];
4949
struct _Py_unicode_fs_codec fs_codec;
5050

51-
/* This dictionary holds all interned unicode strings. Note that references
52-
to strings in this dictionary are *not* counted in the string's ob_refcnt.
53-
When the interned string reaches a refcnt of 0 the string deallocation
54-
function will delete the reference from this dictionary.
55-
56-
Another way to look at this is that to say that the actual reference
57-
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
58-
*/
59-
PyObject *interned;
60-
6151
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
6252
struct _Py_unicode_ids ids;
6353
};
6454

65-
extern void _PyUnicode_ClearInterned(PyInterpreterState *);
55+
extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
6656

6757

6858
#ifdef __cplusplus
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a regression when a type method like ``__init__()`` is modified in a
2+
subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type
3+
``update_slot()``. Revert the change which made the Unicode dictionary of
4+
interned strings compatible with subinterpreters: the internal interned
5+
dictionary is shared again by all interpreters. Patch by Victor Stinner.

Objects/typeobject.c

+22
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ typedef struct PySlot_Offset {
5454
} PySlot_Offset;
5555

5656

57+
/* bpo-40521: Interned strings are shared by all subinterpreters */
58+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
59+
# define INTERN_NAME_STRINGS
60+
#endif
61+
5762
/* alphabetical order */
5863
_Py_IDENTIFIER(__abstractmethods__);
5964
_Py_IDENTIFIER(__annotations__);
@@ -4028,6 +4033,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
40284033
if (name == NULL)
40294034
return -1;
40304035
}
4036+
#ifdef INTERN_NAME_STRINGS
40314037
if (!PyUnicode_CHECK_INTERNED(name)) {
40324038
PyUnicode_InternInPlace(&name);
40334039
if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -4037,6 +4043,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
40374043
return -1;
40384044
}
40394045
}
4046+
#endif
40404047
}
40414048
else {
40424049
/* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -8424,10 +8431,17 @@ _PyTypes_InitSlotDefs(void)
84248431
for (slotdef *p = slotdefs; p->name; p++) {
84258432
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
84268433
assert(!p[1].name || p->offset <= p[1].offset);
8434+
#ifdef INTERN_NAME_STRINGS
84278435
p->name_strobj = PyUnicode_InternFromString(p->name);
84288436
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
84298437
return _PyStatus_NO_MEMORY();
84308438
}
8439+
#else
8440+
p->name_strobj = PyUnicode_FromString(p->name);
8441+
if (!p->name_strobj) {
8442+
return _PyStatus_NO_MEMORY();
8443+
}
8444+
#endif
84318445
}
84328446
slotdefs_initialized = 1;
84338447
return _PyStatus_OK();
@@ -8452,16 +8466,24 @@ update_slot(PyTypeObject *type, PyObject *name)
84528466
int offset;
84538467

84548468
assert(PyUnicode_CheckExact(name));
8469+
#ifdef INTERN_NAME_STRINGS
84558470
assert(PyUnicode_CHECK_INTERNED(name));
8471+
#endif
84568472

84578473
assert(slotdefs_initialized);
84588474
pp = ptrs;
84598475
for (p = slotdefs; p->name; p++) {
84608476
assert(PyUnicode_CheckExact(p->name_strobj));
84618477
assert(PyUnicode_CheckExact(name));
8478+
#ifdef INTERN_NAME_STRINGS
84628479
if (p->name_strobj == name) {
84638480
*pp++ = p;
84648481
}
8482+
#else
8483+
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
8484+
*pp++ = p;
8485+
}
8486+
#endif
84658487
}
84668488
*pp = NULL;
84678489
for (pp = ptrs; *pp; pp++) {

Objects/unicodeobject.c

+47-19
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,22 @@ extern "C" {
214214
# define OVERALLOCATE_FACTOR 4
215215
#endif
216216

217+
/* bpo-40521: Interned strings are shared by all interpreters. */
218+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
219+
# define INTERNED_STRINGS
220+
#endif
221+
222+
/* This dictionary holds all interned unicode strings. Note that references
223+
to strings in this dictionary are *not* counted in the string's ob_refcnt.
224+
When the interned string reaches a refcnt of 0 the string deallocation
225+
function will delete the reference from this dictionary.
226+
227+
Another way to look at this is that to say that the actual reference
228+
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
229+
*/
230+
#ifdef INTERNED_STRINGS
231+
static PyObject *interned = NULL;
232+
#endif
217233

218234
/* Forward declaration */
219235
static inline int
@@ -1950,20 +1966,21 @@ unicode_dealloc(PyObject *unicode)
19501966

19511967
case SSTATE_INTERNED_MORTAL:
19521968
{
1953-
struct _Py_unicode_state *state = get_unicode_state();
1969+
#ifdef INTERNED_STRINGS
19541970
/* Revive the dead object temporarily. PyDict_DelItem() removes two
19551971
references (key and value) which were ignored by
19561972
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19571973
to prevent calling unicode_dealloc() again. Adjust refcnt after
19581974
PyDict_DelItem(). */
19591975
assert(Py_REFCNT(unicode) == 0);
19601976
Py_SET_REFCNT(unicode, 3);
1961-
if (PyDict_DelItem(state->interned, unicode) != 0) {
1977+
if (PyDict_DelItem(interned, unicode) != 0) {
19621978
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19631979
NULL);
19641980
}
19651981
assert(Py_REFCNT(unicode) == 1);
19661982
Py_SET_REFCNT(unicode, 0);
1983+
#endif
19671984
break;
19681985
}
19691986

@@ -11342,11 +11359,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1134211359
if (PyUnicode_CHECK_INTERNED(left))
1134311360
return 0;
1134411361

11362+
#ifdef INTERNED_STRINGS
1134511363
assert(_PyUnicode_HASH(right_uni) != -1);
1134611364
Py_hash_t hash = _PyUnicode_HASH(left);
1134711365
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
1134811366
return 0;
1134911367
}
11368+
#endif
1135011369

1135111370
return unicode_compare_eq(left, right_uni);
1135211371
}
@@ -15591,21 +15610,21 @@ PyUnicode_InternInPlace(PyObject **p)
1559115610
return;
1559215611
}
1559315612

15613+
#ifdef INTERNED_STRINGS
1559415614
if (PyUnicode_READY(s) == -1) {
1559515615
PyErr_Clear();
1559615616
return;
1559715617
}
1559815618

15599-
struct _Py_unicode_state *state = get_unicode_state();
15600-
if (state->interned == NULL) {
15601-
state->interned = PyDict_New();
15602-
if (state->interned == NULL) {
15619+
if (interned == NULL) {
15620+
interned = PyDict_New();
15621+
if (interned == NULL) {
1560315622
PyErr_Clear(); /* Don't leave an exception */
1560415623
return;
1560515624
}
1560615625
}
1560715626

15608-
PyObject *t = PyDict_SetDefault(state->interned, s, s);
15627+
PyObject *t = PyDict_SetDefault(interned, s, s);
1560915628
if (t == NULL) {
1561015629
PyErr_Clear();
1561115630
return;
@@ -15622,9 +15641,13 @@ PyUnicode_InternInPlace(PyObject **p)
1562215641
this. */
1562315642
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1562415643
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15644+
#else
15645+
// PyDict expects that interned strings have their hash
15646+
// (PyASCIIObject.hash) already computed.
15647+
(void)unicode_hash(s);
15648+
#endif
1562515649
}
1562615650

15627-
1562815651
void
1562915652
PyUnicode_InternImmortal(PyObject **p)
1563015653
{
@@ -15658,25 +15681,29 @@ PyUnicode_InternFromString(const char *cp)
1565815681
void
1565915682
_PyUnicode_ClearInterned(PyInterpreterState *interp)
1566015683
{
15661-
struct _Py_unicode_state *state = &interp->unicode;
15662-
if (state->interned == NULL) {
15684+
if (!_Py_IsMainInterpreter(interp)) {
15685+
// interned dict is shared by all interpreters
1566315686
return;
1566415687
}
15665-
assert(PyDict_CheckExact(state->interned));
15688+
15689+
if (interned == NULL) {
15690+
return;
15691+
}
15692+
assert(PyDict_CheckExact(interned));
1566615693

1566715694
/* Interned unicode strings are not forcibly deallocated; rather, we give
1566815695
them their stolen references back, and then clear and DECREF the
1566915696
interned dict. */
1567015697

1567115698
#ifdef INTERNED_STATS
1567215699
fprintf(stderr, "releasing %zd interned strings\n",
15673-
PyDict_GET_SIZE(state->interned));
15700+
PyDict_GET_SIZE(interned));
1567415701

1567515702
Py_ssize_t immortal_size = 0, mortal_size = 0;
1567615703
#endif
1567715704
Py_ssize_t pos = 0;
1567815705
PyObject *s, *ignored_value;
15679-
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
15706+
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
1568015707
assert(PyUnicode_IS_READY(s));
1568115708

1568215709
switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15707,8 +15734,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1570715734
mortal_size, immortal_size);
1570815735
#endif
1570915736

15710-
PyDict_Clear(state->interned);
15711-
Py_CLEAR(state->interned);
15737+
PyDict_Clear(interned);
15738+
Py_CLEAR(interned);
1571215739
}
1571315740

1571415741

@@ -16079,8 +16106,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
1607916106
static inline int
1608016107
unicode_is_finalizing(void)
1608116108
{
16082-
struct _Py_unicode_state *state = get_unicode_state();
16083-
return (state->interned == NULL);
16109+
return (interned == NULL);
1608416110
}
1608516111
#endif
1608616112

@@ -16090,8 +16116,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1609016116
{
1609116117
struct _Py_unicode_state *state = &interp->unicode;
1609216118

16093-
// _PyUnicode_ClearInterned() must be called before
16094-
assert(state->interned == NULL);
16119+
if (_Py_IsMainInterpreter(interp)) {
16120+
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16121+
assert(interned == NULL);
16122+
}
1609516123

1609616124
_PyUnicode_FiniEncodings(&state->fs_codec);
1609716125

0 commit comments

Comments
 (0)