Skip to content

Commit ea25180

Browse files
authored
bpo-40521: Per-interpreter interned strings (GH-20085)
Make the Unicode dictionary of interned strings compatible with subinterpreters. Remove the INTERN_NAME_STRINGS macro in typeobject.c: names are always now interned (even if EXPERIMENTAL_ISOLATED_SUBINTERPRETERS macro is defined). _PyUnicode_ClearInterned() now uses PyDict_Next() to no longer allocate memory, to ensure that the interned dictionary is cleared.
1 parent 993e88c commit ea25180

File tree

5 files changed

+43
-83
lines changed

5 files changed

+43
-83
lines changed

Include/internal/pycore_interp.h

+11
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,17 @@ struct _Py_unicode_state {
7676
shared as well. */
7777
PyObject *latin1[256];
7878
struct _Py_unicode_fs_codec fs_codec;
79+
80+
/* This dictionary holds all interned unicode strings. Note that references
81+
to strings in this dictionary are *not* counted in the string's ob_refcnt.
82+
When the interned string reaches a refcnt of 0 the string deallocation
83+
function will delete the reference from this dictionary.
84+
85+
Another way to look at this is that to say that the actual reference
86+
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
87+
*/
88+
PyObject *interned;
89+
7990
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
8091
struct _Py_unicode_ids ids;
8192
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Make the Unicode dictionary of interned strings compatible with
2+
subinterpreters. Patch by Victor Stinner.

Objects/typeobject.c

-22
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,6 @@ typedef struct PySlot_Offset {
4848
} PySlot_Offset;
4949

5050

51-
/* bpo-40521: Interned strings are shared by all subinterpreters */
52-
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
53-
# define INTERN_NAME_STRINGS
54-
#endif
55-
5651
/* alphabetical order */
5752
_Py_IDENTIFIER(__abstractmethods__);
5853
_Py_IDENTIFIER(__class__);
@@ -3527,7 +3522,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
35273522
if (name == NULL)
35283523
return -1;
35293524
}
3530-
#ifdef INTERN_NAME_STRINGS
35313525
if (!PyUnicode_CHECK_INTERNED(name)) {
35323526
PyUnicode_InternInPlace(&name);
35333527
if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -3537,7 +3531,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
35373531
return -1;
35383532
}
35393533
}
3540-
#endif
35413534
}
35423535
else {
35433536
/* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -7683,17 +7676,10 @@ _PyTypes_InitSlotDefs(void)
76837676
for (slotdef *p = slotdefs; p->name; p++) {
76847677
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
76857678
assert(!p[1].name || p->offset <= p[1].offset);
7686-
#ifdef INTERN_NAME_STRINGS
76877679
p->name_strobj = PyUnicode_InternFromString(p->name);
76887680
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
76897681
return _PyStatus_NO_MEMORY();
76907682
}
7691-
#else
7692-
p->name_strobj = PyUnicode_FromString(p->name);
7693-
if (!p->name_strobj) {
7694-
return _PyStatus_NO_MEMORY();
7695-
}
7696-
#endif
76977683
}
76987684
slotdefs_initialized = 1;
76997685
return _PyStatus_OK();
@@ -7718,24 +7704,16 @@ update_slot(PyTypeObject *type, PyObject *name)
77187704
int offset;
77197705

77207706
assert(PyUnicode_CheckExact(name));
7721-
#ifdef INTERN_NAME_STRINGS
77227707
assert(PyUnicode_CHECK_INTERNED(name));
7723-
#endif
77247708

77257709
assert(slotdefs_initialized);
77267710
pp = ptrs;
77277711
for (p = slotdefs; p->name; p++) {
77287712
assert(PyUnicode_CheckExact(p->name_strobj));
77297713
assert(PyUnicode_CheckExact(name));
7730-
#ifdef INTERN_NAME_STRINGS
77317714
if (p->name_strobj == name) {
77327715
*pp++ = p;
77337716
}
7734-
#else
7735-
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
7736-
*pp++ = p;
7737-
}
7738-
#endif
77397717
}
77407718
*pp = NULL;
77417719
for (pp = ptrs; *pp; pp++) {

Objects/unicodeobject.c

+28-61
Original file line numberDiff line numberDiff line change
@@ -206,22 +206,6 @@ extern "C" {
206206
# define OVERALLOCATE_FACTOR 4
207207
#endif
208208

209-
/* bpo-40521: Interned strings are shared by all interpreters. */
210-
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
211-
# define INTERNED_STRINGS
212-
#endif
213-
214-
/* This dictionary holds all interned unicode strings. Note that references
215-
to strings in this dictionary are *not* counted in the string's ob_refcnt.
216-
When the interned string reaches a refcnt of 0 the string deallocation
217-
function will delete the reference from this dictionary.
218-
219-
Another way to look at this is that to say that the actual reference
220-
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
221-
*/
222-
#ifdef INTERNED_STRINGS
223-
static PyObject *interned = NULL;
224-
#endif
225209

226210
static struct _Py_unicode_state*
227211
get_unicode_state(void)
@@ -1946,22 +1930,23 @@ unicode_dealloc(PyObject *unicode)
19461930
break;
19471931

19481932
case SSTATE_INTERNED_MORTAL:
1949-
#ifdef INTERNED_STRINGS
1933+
{
1934+
struct _Py_unicode_state *state = get_unicode_state();
19501935
/* Revive the dead object temporarily. PyDict_DelItem() removes two
19511936
references (key and value) which were ignored by
19521937
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19531938
to prevent calling unicode_dealloc() again. Adjust refcnt after
19541939
PyDict_DelItem(). */
19551940
assert(Py_REFCNT(unicode) == 0);
19561941
Py_SET_REFCNT(unicode, 3);
1957-
if (PyDict_DelItem(interned, unicode) != 0) {
1942+
if (PyDict_DelItem(state->interned, unicode) != 0) {
19581943
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19591944
NULL);
19601945
}
19611946
assert(Py_REFCNT(unicode) == 1);
19621947
Py_SET_REFCNT(unicode, 0);
1963-
#endif
19641948
break;
1949+
}
19651950

19661951
case SSTATE_INTERNED_IMMORTAL:
19671952
_PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
@@ -11536,12 +11521,11 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1153611521
if (PyUnicode_CHECK_INTERNED(left))
1153711522
return 0;
1153811523

11539-
#ifdef INTERNED_STRINGS
1154011524
assert(_PyUnicode_HASH(right_uni) != -1);
1154111525
Py_hash_t hash = _PyUnicode_HASH(left);
11542-
if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
11526+
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
1154311527
return 0;
11544-
#endif
11528+
}
1154511529

1154611530
return unicode_compare_eq(left, right_uni);
1154711531
}
@@ -15765,23 +15749,21 @@ PyUnicode_InternInPlace(PyObject **p)
1576515749
return;
1576615750
}
1576715751

15768-
#ifdef INTERNED_STRINGS
1576915752
if (PyUnicode_READY(s) == -1) {
1577015753
PyErr_Clear();
1577115754
return;
1577215755
}
1577315756

15774-
if (interned == NULL) {
15775-
interned = PyDict_New();
15776-
if (interned == NULL) {
15757+
struct _Py_unicode_state *state = get_unicode_state();
15758+
if (state->interned == NULL) {
15759+
state->interned = PyDict_New();
15760+
if (state->interned == NULL) {
1577715761
PyErr_Clear(); /* Don't leave an exception */
1577815762
return;
1577915763
}
1578015764
}
1578115765

15782-
PyObject *t;
15783-
t = PyDict_SetDefault(interned, s, s);
15784-
15766+
PyObject *t = PyDict_SetDefault(state->interned, s, s);
1578515767
if (t == NULL) {
1578615768
PyErr_Clear();
1578715769
return;
@@ -15798,13 +15780,9 @@ PyUnicode_InternInPlace(PyObject **p)
1579815780
this. */
1579915781
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1580015782
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15801-
#else
15802-
// PyDict expects that interned strings have their hash
15803-
// (PyASCIIObject.hash) already computed.
15804-
(void)unicode_hash(s);
15805-
#endif
1580615783
}
1580715784

15785+
1580815786
void
1580915787
PyUnicode_InternImmortal(PyObject **p)
1581015788
{
@@ -15838,35 +15816,25 @@ PyUnicode_InternFromString(const char *cp)
1583815816
void
1583915817
_PyUnicode_ClearInterned(PyThreadState *tstate)
1584015818
{
15841-
if (!_Py_IsMainInterpreter(tstate)) {
15842-
// interned dict is shared by all interpreters
15843-
return;
15844-
}
15845-
15846-
if (interned == NULL) {
15847-
return;
15848-
}
15849-
assert(PyDict_CheckExact(interned));
15850-
15851-
PyObject *keys = PyDict_Keys(interned);
15852-
if (keys == NULL) {
15853-
PyErr_Clear();
15819+
struct _Py_unicode_state *state = &tstate->interp->unicode;
15820+
if (state->interned == NULL) {
1585415821
return;
1585515822
}
15856-
assert(PyList_CheckExact(keys));
15823+
assert(PyDict_CheckExact(state->interned));
1585715824

1585815825
/* Interned unicode strings are not forcibly deallocated; rather, we give
1585915826
them their stolen references back, and then clear and DECREF the
1586015827
interned dict. */
1586115828

15862-
Py_ssize_t n = PyList_GET_SIZE(keys);
1586315829
#ifdef INTERNED_STATS
15864-
fprintf(stderr, "releasing %zd interned strings\n", n);
15830+
fprintf(stderr, "releasing %zd interned strings\n",
15831+
PyDict_GET_SIZE(state->interned));
1586515832

1586615833
Py_ssize_t immortal_size = 0, mortal_size = 0;
1586715834
#endif
15868-
for (Py_ssize_t i = 0; i < n; i++) {
15869-
PyObject *s = PyList_GET_ITEM(keys, i);
15835+
Py_ssize_t pos = 0;
15836+
PyObject *s, *ignored_value;
15837+
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
1587015838
assert(PyUnicode_IS_READY(s));
1587115839

1587215840
switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15896,10 +15864,9 @@ _PyUnicode_ClearInterned(PyThreadState *tstate)
1589615864
"total size of all interned strings: %zd/%zd mortal/immortal\n",
1589715865
mortal_size, immortal_size);
1589815866
#endif
15899-
Py_DECREF(keys);
1590015867

15901-
PyDict_Clear(interned);
15902-
Py_CLEAR(interned);
15868+
PyDict_Clear(state->interned);
15869+
Py_CLEAR(state->interned);
1590315870
}
1590415871

1590515872

@@ -16269,19 +16236,19 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
1626916236
void
1627016237
_PyUnicode_Fini(PyThreadState *tstate)
1627116238
{
16239+
struct _Py_unicode_state *state = &tstate->interp->unicode;
16240+
1627216241
// _PyUnicode_ClearInterned() must be called before
16242+
assert(state->interned == NULL);
1627316243

16274-
struct _Py_unicode_state *state = &tstate->interp->unicode;
16244+
_PyUnicode_FiniEncodings(&state->fs_codec);
1627516245

16276-
Py_CLEAR(state->empty_string);
16246+
unicode_clear_identifiers(tstate);
1627716247

1627816248
for (Py_ssize_t i = 0; i < 256; i++) {
1627916249
Py_CLEAR(state->latin1[i]);
1628016250
}
16281-
16282-
unicode_clear_identifiers(tstate);
16283-
16284-
_PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
16251+
Py_CLEAR(state->empty_string);
1628516252
}
1628616253

1628716254

Python/pylifecycle.c

+2
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,8 @@ finalize_interp_types(PyThreadState *tstate)
15731573
_PyFrame_Fini(tstate);
15741574
_PyAsyncGen_Fini(tstate);
15751575
_PyContext_Fini(tstate);
1576+
// Call _PyUnicode_ClearInterned() before _PyDict_Fini() since it uses
1577+
// a dict internally.
15761578
_PyUnicode_ClearInterned(tstate);
15771579

15781580
_PyDict_Fini(tstate);

0 commit comments

Comments
 (0)