@@ -303,17 +303,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
303
303
/* List of static strings. */
304
304
static _Py_Identifier * static_strings = NULL ;
305
305
306
- /* bpo-40521: Latin1 singletons are shared by all interpreters. */
307
- #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
308
- # define LATIN1_SINGLETONS
309
- #endif
310
-
311
- #ifdef LATIN1_SINGLETONS
312
- /* Single character Unicode strings in the Latin-1 range are being
313
- shared as well. */
314
- static PyObject * unicode_latin1 [256 ] = {NULL };
315
- #endif
316
-
317
306
/* Fast detection of the most frequent whitespace characters */
318
307
const unsigned char _Py_ascii_whitespace [] = {
319
308
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
@@ -657,9 +646,8 @@ unicode_result_wchar(PyObject *unicode)
657
646
if (len == 1 ) {
658
647
wchar_t ch = _PyUnicode_WSTR (unicode )[0 ];
659
648
if ((Py_UCS4 )ch < 256 ) {
660
- PyObject * latin1_char = get_latin1_char ((unsigned char )ch );
661
649
Py_DECREF (unicode );
662
- return latin1_char ;
650
+ return get_latin1_char (( unsigned char ) ch ) ;
663
651
}
664
652
}
665
653
@@ -692,13 +680,13 @@ unicode_result_ready(PyObject *unicode)
692
680
return empty ;
693
681
}
694
682
695
- #ifdef LATIN1_SINGLETONS
696
683
if (length == 1 ) {
697
- const void * data = PyUnicode_DATA (unicode );
698
684
int kind = PyUnicode_KIND (unicode );
699
- Py_UCS4 ch = PyUnicode_READ (kind , data , 0 );
700
- if (ch < 256 ) {
701
- PyObject * latin1_char = unicode_latin1 [ch ];
685
+ if (kind == PyUnicode_1BYTE_KIND ) {
686
+ Py_UCS1 * data = PyUnicode_1BYTE_DATA (unicode );
687
+ Py_UCS1 ch = data [0 ];
688
+ struct _Py_unicode_state * state = get_unicode_state ();
689
+ PyObject * latin1_char = state -> latin1 [ch ];
702
690
if (latin1_char != NULL ) {
703
691
if (unicode != latin1_char ) {
704
692
Py_INCREF (latin1_char );
@@ -709,12 +697,14 @@ unicode_result_ready(PyObject *unicode)
709
697
else {
710
698
assert (_PyUnicode_CheckConsistency (unicode , 1 ));
711
699
Py_INCREF (unicode );
712
- unicode_latin1 [ch ] = unicode ;
700
+ state -> latin1 [ch ] = unicode ;
713
701
return unicode ;
714
702
}
715
703
}
704
+ else {
705
+ assert (PyUnicode_READ_CHAR (unicode , 0 ) >= 256 );
706
+ }
716
707
}
717
- #endif
718
708
719
709
assert (_PyUnicode_CheckConsistency (unicode , 1 ));
720
710
return unicode ;
@@ -1981,18 +1971,18 @@ unicode_dealloc(PyObject *unicode)
1981
1971
static int
1982
1972
unicode_is_singleton (PyObject * unicode )
1983
1973
{
1984
- if (unicode == unicode_get_empty ()) {
1974
+ struct _Py_unicode_state * state = get_unicode_state ();
1975
+ if (unicode == state -> empty ) {
1985
1976
return 1 ;
1986
1977
}
1987
- #ifdef LATIN1_SINGLETONS
1988
1978
PyASCIIObject * ascii = (PyASCIIObject * )unicode ;
1989
1979
if (ascii -> state .kind != PyUnicode_WCHAR_KIND && ascii -> length == 1 )
1990
1980
{
1991
1981
Py_UCS4 ch = PyUnicode_READ_CHAR (unicode , 0 );
1992
- if (ch < 256 && unicode_latin1 [ch ] == unicode )
1982
+ if (ch < 256 && state -> latin1 [ch ] == unicode ) {
1993
1983
return 1 ;
1984
+ }
1994
1985
}
1995
- #endif
1996
1986
return 0 ;
1997
1987
}
1998
1988
#endif
@@ -2130,17 +2120,15 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
2130
2120
}
2131
2121
2132
2122
static PyObject *
2133
- get_latin1_char (unsigned char ch )
2123
+ get_latin1_char (Py_UCS1 ch )
2134
2124
{
2135
- PyObject * unicode ;
2125
+ struct _Py_unicode_state * state = get_unicode_state () ;
2136
2126
2137
- #ifdef LATIN1_SINGLETONS
2138
- unicode = unicode_latin1 [ch ];
2127
+ PyObject * unicode = state -> latin1 [ch ];
2139
2128
if (unicode ) {
2140
2129
Py_INCREF (unicode );
2141
2130
return unicode ;
2142
2131
}
2143
- #endif
2144
2132
2145
2133
unicode = PyUnicode_New (1 , ch );
2146
2134
if (!unicode ) {
@@ -2150,10 +2138,8 @@ get_latin1_char(unsigned char ch)
2150
2138
PyUnicode_1BYTE_DATA (unicode )[0 ] = ch ;
2151
2139
assert (_PyUnicode_CheckConsistency (unicode , 1 ));
2152
2140
2153
- #ifdef LATIN1_SINGLETONS
2154
2141
Py_INCREF (unicode );
2155
- unicode_latin1 [ch ] = unicode ;
2156
- #endif
2142
+ state -> latin1 [ch ] = unicode ;
2157
2143
return unicode ;
2158
2144
}
2159
2145
@@ -2164,8 +2150,9 @@ unicode_char(Py_UCS4 ch)
2164
2150
2165
2151
assert (ch <= MAX_UNICODE );
2166
2152
2167
- if (ch < 256 )
2153
+ if (ch < 256 ) {
2168
2154
return get_latin1_char (ch );
2155
+ }
2169
2156
2170
2157
unicode = PyUnicode_New (1 , ch );
2171
2158
if (unicode == NULL )
@@ -2367,11 +2354,13 @@ _PyUnicode_FromUCS1(const Py_UCS1* u, Py_ssize_t size)
2367
2354
PyObject * res ;
2368
2355
unsigned char max_char ;
2369
2356
2370
- if (size == 0 )
2357
+ if (size == 0 ) {
2371
2358
_Py_RETURN_UNICODE_EMPTY ();
2359
+ }
2372
2360
assert (size > 0 );
2373
- if (size == 1 )
2361
+ if (size == 1 ) {
2374
2362
return get_latin1_char (u [0 ]);
2363
+ }
2375
2364
2376
2365
max_char = ucs1lib_find_max_char (u , u + size );
2377
2366
res = PyUnicode_New (size , max_char );
@@ -5008,8 +4997,9 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
5008
4997
5009
4998
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
5010
4999
if (size == 1 && (unsigned char )s [0 ] < 128 ) {
5011
- if (consumed )
5000
+ if (consumed ) {
5012
5001
* consumed = 1 ;
5002
+ }
5013
5003
return get_latin1_char ((unsigned char )s [0 ]);
5014
5004
}
5015
5005
@@ -7176,8 +7166,9 @@ PyUnicode_DecodeASCII(const char *s,
7176
7166
_Py_RETURN_UNICODE_EMPTY ();
7177
7167
7178
7168
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
7179
- if (size == 1 && (unsigned char )s [0 ] < 128 )
7169
+ if (size == 1 && (unsigned char )s [0 ] < 128 ) {
7180
7170
return get_latin1_char ((unsigned char )s [0 ]);
7171
+ }
7181
7172
7182
7173
// Shortcut for simple case
7183
7174
PyObject * u = PyUnicode_New (size , 127 );
@@ -16234,12 +16225,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
16234
16225
16235
16226
Py_CLEAR (state -> empty );
16236
16227
16228
+ for (Py_ssize_t i = 0 ; i < 256 ; i ++ ) {
16229
+ Py_CLEAR (state -> latin1 [i ]);
16230
+ }
16231
+
16237
16232
if (is_main_interp ) {
16238
- #ifdef LATIN1_SINGLETONS
16239
- for (Py_ssize_t i = 0 ; i < 256 ; i ++ ) {
16240
- Py_CLEAR (unicode_latin1 [i ]);
16241
- }
16242
- #endif
16243
16233
unicode_clear_static_strings ();
16244
16234
}
16245
16235
0 commit comments