@@ -214,6 +214,22 @@ extern "C" {
214
214
# define OVERALLOCATE_FACTOR 4
215
215
#endif
216
216
217
+ /* bpo-40521: Interned strings are shared by all interpreters. */
218
+ #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
219
+ # define INTERNED_STRINGS
220
+ #endif
221
+
222
+ /* This dictionary holds all interned unicode strings. Note that references
223
+ to strings in this dictionary are *not* counted in the string's ob_refcnt.
224
+ When the interned string reaches a refcnt of 0 the string deallocation
225
+ function will delete the reference from this dictionary.
226
+
227
+ Another way to look at this is that to say that the actual reference
228
+ count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
229
+ */
230
+ #ifdef INTERNED_STRINGS
231
+ static PyObject * interned = NULL ;
232
+ #endif
217
233
218
234
/* Forward declaration */
219
235
static inline int
@@ -1950,20 +1966,21 @@ unicode_dealloc(PyObject *unicode)
1950
1966
1951
1967
case SSTATE_INTERNED_MORTAL :
1952
1968
{
1953
- struct _Py_unicode_state * state = get_unicode_state ();
1969
+ #ifdef INTERNED_STRINGS
1954
1970
/* Revive the dead object temporarily. PyDict_DelItem() removes two
1955
1971
references (key and value) which were ignored by
1956
1972
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
1957
1973
to prevent calling unicode_dealloc() again. Adjust refcnt after
1958
1974
PyDict_DelItem(). */
1959
1975
assert (Py_REFCNT (unicode ) == 0 );
1960
1976
Py_SET_REFCNT (unicode , 3 );
1961
- if (PyDict_DelItem (state -> interned , unicode ) != 0 ) {
1977
+ if (PyDict_DelItem (interned , unicode ) != 0 ) {
1962
1978
_PyErr_WriteUnraisableMsg ("deletion of interned string failed" ,
1963
1979
NULL );
1964
1980
}
1965
1981
assert (Py_REFCNT (unicode ) == 1 );
1966
1982
Py_SET_REFCNT (unicode , 0 );
1983
+ #endif
1967
1984
break ;
1968
1985
}
1969
1986
@@ -11342,11 +11359,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
11342
11359
if (PyUnicode_CHECK_INTERNED (left ))
11343
11360
return 0 ;
11344
11361
11362
+ #ifdef INTERNED_STRINGS
11345
11363
assert (_PyUnicode_HASH (right_uni ) != -1 );
11346
11364
Py_hash_t hash = _PyUnicode_HASH (left );
11347
11365
if (hash != -1 && hash != _PyUnicode_HASH (right_uni )) {
11348
11366
return 0 ;
11349
11367
}
11368
+ #endif
11350
11369
11351
11370
return unicode_compare_eq (left , right_uni );
11352
11371
}
@@ -15591,21 +15610,21 @@ PyUnicode_InternInPlace(PyObject **p)
15591
15610
return ;
15592
15611
}
15593
15612
15613
+ #ifdef INTERNED_STRINGS
15594
15614
if (PyUnicode_READY (s ) == -1 ) {
15595
15615
PyErr_Clear ();
15596
15616
return ;
15597
15617
}
15598
15618
15599
- struct _Py_unicode_state * state = get_unicode_state ();
15600
- if (state -> interned == NULL ) {
15601
- state -> interned = PyDict_New ();
15602
- if (state -> interned == NULL ) {
15619
+ if (interned == NULL ) {
15620
+ interned = PyDict_New ();
15621
+ if (interned == NULL ) {
15603
15622
PyErr_Clear (); /* Don't leave an exception */
15604
15623
return ;
15605
15624
}
15606
15625
}
15607
15626
15608
- PyObject * t = PyDict_SetDefault (state -> interned , s , s );
15627
+ PyObject * t = PyDict_SetDefault (interned , s , s );
15609
15628
if (t == NULL ) {
15610
15629
PyErr_Clear ();
15611
15630
return ;
@@ -15622,9 +15641,13 @@ PyUnicode_InternInPlace(PyObject **p)
15622
15641
this. */
15623
15642
Py_SET_REFCNT (s , Py_REFCNT (s ) - 2 );
15624
15643
_PyUnicode_STATE (s ).interned = SSTATE_INTERNED_MORTAL ;
15644
+ #else
15645
+ // PyDict expects that interned strings have their hash
15646
+ // (PyASCIIObject.hash) already computed.
15647
+ (void )unicode_hash (s );
15648
+ #endif
15625
15649
}
15626
15650
15627
-
15628
15651
void
15629
15652
PyUnicode_InternImmortal (PyObject * * p )
15630
15653
{
@@ -15658,25 +15681,29 @@ PyUnicode_InternFromString(const char *cp)
15658
15681
void
15659
15682
_PyUnicode_ClearInterned (PyInterpreterState * interp )
15660
15683
{
15661
- struct _Py_unicode_state * state = & interp -> unicode ;
15662
- if ( state -> interned == NULL ) {
15684
+ if (! _Py_IsMainInterpreter ( interp )) {
15685
+ // interned dict is shared by all interpreters
15663
15686
return ;
15664
15687
}
15665
- assert (PyDict_CheckExact (state -> interned ));
15688
+
15689
+ if (interned == NULL ) {
15690
+ return ;
15691
+ }
15692
+ assert (PyDict_CheckExact (interned ));
15666
15693
15667
15694
/* Interned unicode strings are not forcibly deallocated; rather, we give
15668
15695
them their stolen references back, and then clear and DECREF the
15669
15696
interned dict. */
15670
15697
15671
15698
#ifdef INTERNED_STATS
15672
15699
fprintf (stderr , "releasing %zd interned strings\n" ,
15673
- PyDict_GET_SIZE (state -> interned ));
15700
+ PyDict_GET_SIZE (interned ));
15674
15701
15675
15702
Py_ssize_t immortal_size = 0 , mortal_size = 0 ;
15676
15703
#endif
15677
15704
Py_ssize_t pos = 0 ;
15678
15705
PyObject * s , * ignored_value ;
15679
- while (PyDict_Next (state -> interned , & pos , & s , & ignored_value )) {
15706
+ while (PyDict_Next (interned , & pos , & s , & ignored_value )) {
15680
15707
assert (PyUnicode_IS_READY (s ));
15681
15708
15682
15709
switch (PyUnicode_CHECK_INTERNED (s )) {
@@ -15707,8 +15734,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
15707
15734
mortal_size , immortal_size );
15708
15735
#endif
15709
15736
15710
- PyDict_Clear (state -> interned );
15711
- Py_CLEAR (state -> interned );
15737
+ PyDict_Clear (interned );
15738
+ Py_CLEAR (interned );
15712
15739
}
15713
15740
15714
15741
@@ -16079,8 +16106,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
16079
16106
static inline int
16080
16107
unicode_is_finalizing (void )
16081
16108
{
16082
- struct _Py_unicode_state * state = get_unicode_state ();
16083
- return (state -> interned == NULL );
16109
+ return (interned == NULL );
16084
16110
}
16085
16111
#endif
16086
16112
@@ -16090,8 +16116,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
16090
16116
{
16091
16117
struct _Py_unicode_state * state = & interp -> unicode ;
16092
16118
16093
- // _PyUnicode_ClearInterned() must be called before
16094
- assert (state -> interned == NULL );
16119
+ if (_Py_IsMainInterpreter (interp )) {
16120
+ // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16121
+ assert (interned == NULL );
16122
+ }
16095
16123
16096
16124
_PyUnicode_FiniEncodings (& state -> fs_codec );
16097
16125
0 commit comments