@@ -211,6 +211,22 @@ extern "C" {
211
211
# define OVERALLOCATE_FACTOR 4
212
212
#endif
213
213
214
+ /* bpo-40521: Interned strings are shared by all interpreters. */
215
+ #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
216
+ # define INTERNED_STRINGS
217
+ #endif
218
+
219
+ /* This dictionary holds all interned unicode strings. Note that references
220
+ to strings in this dictionary are *not* counted in the string's ob_refcnt.
221
+ When the interned string reaches a refcnt of 0 the string deallocation
222
+ function will delete the reference from this dictionary.
223
+
224
+ Another way to look at this is that to say that the actual reference
225
+ count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
226
+ */
227
+ #ifdef INTERNED_STRINGS
228
+ static PyObject * interned = NULL ;
229
+ #endif
214
230
215
231
static struct _Py_unicode_state *
216
232
get_unicode_state (void )
@@ -1936,20 +1952,21 @@ unicode_dealloc(PyObject *unicode)
1936
1952
1937
1953
case SSTATE_INTERNED_MORTAL :
1938
1954
{
1939
- struct _Py_unicode_state * state = get_unicode_state ();
1955
+ #ifdef INTERNED_STRINGS
1940
1956
/* Revive the dead object temporarily. PyDict_DelItem() removes two
1941
1957
references (key and value) which were ignored by
1942
1958
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
1943
1959
to prevent calling unicode_dealloc() again. Adjust refcnt after
1944
1960
PyDict_DelItem(). */
1945
1961
assert (Py_REFCNT (unicode ) == 0 );
1946
1962
Py_SET_REFCNT (unicode , 3 );
1947
- if (PyDict_DelItem (state -> interned , unicode ) != 0 ) {
1963
+ if (PyDict_DelItem (interned , unicode ) != 0 ) {
1948
1964
_PyErr_WriteUnraisableMsg ("deletion of interned string failed" ,
1949
1965
NULL );
1950
1966
}
1951
1967
assert (Py_REFCNT (unicode ) == 1 );
1952
1968
Py_SET_REFCNT (unicode , 0 );
1969
+ #endif
1953
1970
break ;
1954
1971
}
1955
1972
@@ -11600,11 +11617,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
11600
11617
if (PyUnicode_CHECK_INTERNED (left ))
11601
11618
return 0 ;
11602
11619
11620
+ #ifdef INTERNED_STRINGS
11603
11621
assert (_PyUnicode_HASH (right_uni ) != -1 );
11604
11622
Py_hash_t hash = _PyUnicode_HASH (left );
11605
11623
if (hash != -1 && hash != _PyUnicode_HASH (right_uni )) {
11606
11624
return 0 ;
11607
11625
}
11626
+ #endif
11608
11627
11609
11628
return unicode_compare_eq (left , right_uni );
11610
11629
}
@@ -15833,21 +15852,21 @@ PyUnicode_InternInPlace(PyObject **p)
15833
15852
return ;
15834
15853
}
15835
15854
15855
+ #ifdef INTERNED_STRINGS
15836
15856
if (PyUnicode_READY (s ) == -1 ) {
15837
15857
PyErr_Clear ();
15838
15858
return ;
15839
15859
}
15840
15860
15841
- struct _Py_unicode_state * state = get_unicode_state ();
15842
- if (state -> interned == NULL ) {
15843
- state -> interned = PyDict_New ();
15844
- if (state -> interned == NULL ) {
15861
+ if (interned == NULL ) {
15862
+ interned = PyDict_New ();
15863
+ if (interned == NULL ) {
15845
15864
PyErr_Clear (); /* Don't leave an exception */
15846
15865
return ;
15847
15866
}
15848
15867
}
15849
15868
15850
- PyObject * t = PyDict_SetDefault (state -> interned , s , s );
15869
+ PyObject * t = PyDict_SetDefault (interned , s , s );
15851
15870
if (t == NULL ) {
15852
15871
PyErr_Clear ();
15853
15872
return ;
@@ -15864,9 +15883,13 @@ PyUnicode_InternInPlace(PyObject **p)
15864
15883
this. */
15865
15884
Py_SET_REFCNT (s , Py_REFCNT (s ) - 2 );
15866
15885
_PyUnicode_STATE (s ).interned = SSTATE_INTERNED_MORTAL ;
15886
+ #else
15887
+ // PyDict expects that interned strings have their hash
15888
+ // (PyASCIIObject.hash) already computed.
15889
+ (void )unicode_hash (s );
15890
+ #endif
15867
15891
}
15868
15892
15869
-
15870
15893
void
15871
15894
PyUnicode_InternImmortal (PyObject * * p )
15872
15895
{
@@ -15900,25 +15923,29 @@ PyUnicode_InternFromString(const char *cp)
15900
15923
void
15901
15924
_PyUnicode_ClearInterned (PyInterpreterState * interp )
15902
15925
{
15903
- struct _Py_unicode_state * state = & interp -> unicode ;
15904
- if (state -> interned == NULL ) {
15926
+ if (!_Py_IsMainInterpreter (interp )) {
15927
+ // interned dict is shared by all interpreters
15928
+ return ;
15929
+ }
15930
+
15931
+ if (interned == NULL ) {
15905
15932
return ;
15906
15933
}
15907
- assert (PyDict_CheckExact (state -> interned ));
15934
+ assert (PyDict_CheckExact (interned ));
15908
15935
15909
15936
/* Interned unicode strings are not forcibly deallocated; rather, we give
15910
15937
them their stolen references back, and then clear and DECREF the
15911
15938
interned dict. */
15912
15939
15913
15940
#ifdef INTERNED_STATS
15914
15941
fprintf (stderr , "releasing %zd interned strings\n" ,
15915
- PyDict_GET_SIZE (state -> interned ));
15942
+ PyDict_GET_SIZE (interned ));
15916
15943
15917
15944
Py_ssize_t immortal_size = 0 , mortal_size = 0 ;
15918
15945
#endif
15919
15946
Py_ssize_t pos = 0 ;
15920
15947
PyObject * s , * ignored_value ;
15921
- while (PyDict_Next (state -> interned , & pos , & s , & ignored_value )) {
15948
+ while (PyDict_Next (interned , & pos , & s , & ignored_value )) {
15922
15949
assert (PyUnicode_IS_READY (s ));
15923
15950
15924
15951
switch (PyUnicode_CHECK_INTERNED (s )) {
@@ -15949,8 +15976,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
15949
15976
mortal_size , immortal_size );
15950
15977
#endif
15951
15978
15952
- PyDict_Clear (state -> interned );
15953
- Py_CLEAR (state -> interned );
15979
+ PyDict_Clear (interned );
15980
+ Py_CLEAR (interned );
15954
15981
}
15955
15982
15956
15983
@@ -16322,8 +16349,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
16322
16349
{
16323
16350
struct _Py_unicode_state * state = & interp -> unicode ;
16324
16351
16325
- // _PyUnicode_ClearInterned() must be called before
16326
- assert (state -> interned == NULL );
16352
+ if (_Py_IsMainInterpreter (interp )) {
16353
+ // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16354
+ assert (interned == NULL );
16355
+ }
16327
16356
16328
16357
_PyUnicode_FiniEncodings (& state -> fs_codec );
16329
16358
0 commit comments