@@ -303,17 +303,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
303303/* List of static strings. */
304304static _Py_Identifier * static_strings = NULL ;
305305
306- /* bpo-40521: Latin1 singletons are shared by all interpreters. */
307- #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
308- # define LATIN1_SINGLETONS
309- #endif
310-
311- #ifdef LATIN1_SINGLETONS
312- /* Single character Unicode strings in the Latin-1 range are being
313- shared as well. */
314- static PyObject * unicode_latin1 [256 ] = {NULL };
315- #endif
316-
317306/* Fast detection of the most frequent whitespace characters */
318307const unsigned char _Py_ascii_whitespace [] = {
319308 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
@@ -657,9 +646,8 @@ unicode_result_wchar(PyObject *unicode)
657646 if (len == 1 ) {
658647 wchar_t ch = _PyUnicode_WSTR (unicode )[0 ];
659648 if ((Py_UCS4 )ch < 256 ) {
660- PyObject * latin1_char = get_latin1_char ((unsigned char )ch );
661649 Py_DECREF (unicode );
662- return latin1_char ;
650+ return get_latin1_char (( unsigned char ) ch ) ;
663651 }
664652 }
665653
@@ -692,13 +680,13 @@ unicode_result_ready(PyObject *unicode)
692680 return empty ;
693681 }
694682
695- #ifdef LATIN1_SINGLETONS
696683 if (length == 1 ) {
697- const void * data = PyUnicode_DATA (unicode );
698684 int kind = PyUnicode_KIND (unicode );
699- Py_UCS4 ch = PyUnicode_READ (kind , data , 0 );
700- if (ch < 256 ) {
701- PyObject * latin1_char = unicode_latin1 [ch ];
685+ if (kind == PyUnicode_1BYTE_KIND ) {
686+ Py_UCS1 * data = PyUnicode_1BYTE_DATA (unicode );
687+ Py_UCS1 ch = data [0 ];
688+ struct _Py_unicode_state * state = get_unicode_state ();
689+ PyObject * latin1_char = state -> latin1 [ch ];
702690 if (latin1_char != NULL ) {
703691 if (unicode != latin1_char ) {
704692 Py_INCREF (latin1_char );
@@ -709,12 +697,14 @@ unicode_result_ready(PyObject *unicode)
709697 else {
710698 assert (_PyUnicode_CheckConsistency (unicode , 1 ));
711699 Py_INCREF (unicode );
712- unicode_latin1 [ch ] = unicode ;
700+ state -> latin1 [ch ] = unicode ;
713701 return unicode ;
714702 }
715703 }
704+ else {
705+ assert (PyUnicode_READ_CHAR (unicode , 0 ) >= 256 );
706+ }
716707 }
717- #endif
718708
719709 assert (_PyUnicode_CheckConsistency (unicode , 1 ));
720710 return unicode ;
@@ -1981,18 +1971,18 @@ unicode_dealloc(PyObject *unicode)
19811971static int
19821972unicode_is_singleton (PyObject * unicode )
19831973{
1984- if (unicode == unicode_get_empty ()) {
1974+ struct _Py_unicode_state * state = get_unicode_state ();
1975+ if (unicode == state -> empty ) {
19851976 return 1 ;
19861977 }
1987- #ifdef LATIN1_SINGLETONS
19881978 PyASCIIObject * ascii = (PyASCIIObject * )unicode ;
19891979 if (ascii -> state .kind != PyUnicode_WCHAR_KIND && ascii -> length == 1 )
19901980 {
19911981 Py_UCS4 ch = PyUnicode_READ_CHAR (unicode , 0 );
1992- if (ch < 256 && unicode_latin1 [ch ] == unicode )
1982+ if (ch < 256 && state -> latin1 [ch ] == unicode ) {
19931983 return 1 ;
1984+ }
19941985 }
1995- #endif
19961986 return 0 ;
19971987}
19981988#endif
@@ -2130,17 +2120,15 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
21302120}
21312121
21322122static PyObject *
2133- get_latin1_char (unsigned char ch )
2123+ get_latin1_char (Py_UCS1 ch )
21342124{
2135- PyObject * unicode ;
2125+ struct _Py_unicode_state * state = get_unicode_state () ;
21362126
2137- #ifdef LATIN1_SINGLETONS
2138- unicode = unicode_latin1 [ch ];
2127+ PyObject * unicode = state -> latin1 [ch ];
21392128 if (unicode ) {
21402129 Py_INCREF (unicode );
21412130 return unicode ;
21422131 }
2143- #endif
21442132
21452133 unicode = PyUnicode_New (1 , ch );
21462134 if (!unicode ) {
@@ -2150,10 +2138,8 @@ get_latin1_char(unsigned char ch)
21502138 PyUnicode_1BYTE_DATA (unicode )[0 ] = ch ;
21512139 assert (_PyUnicode_CheckConsistency (unicode , 1 ));
21522140
2153- #ifdef LATIN1_SINGLETONS
21542141 Py_INCREF (unicode );
2155- unicode_latin1 [ch ] = unicode ;
2156- #endif
2142+ state -> latin1 [ch ] = unicode ;
21572143 return unicode ;
21582144}
21592145
@@ -2164,8 +2150,9 @@ unicode_char(Py_UCS4 ch)
21642150
21652151 assert (ch <= MAX_UNICODE );
21662152
2167- if (ch < 256 )
2153+ if (ch < 256 ) {
21682154 return get_latin1_char (ch );
2155+ }
21692156
21702157 unicode = PyUnicode_New (1 , ch );
21712158 if (unicode == NULL )
@@ -2367,11 +2354,13 @@ _PyUnicode_FromUCS1(const Py_UCS1* u, Py_ssize_t size)
23672354 PyObject * res ;
23682355 unsigned char max_char ;
23692356
2370- if (size == 0 )
2357+ if (size == 0 ) {
23712358 _Py_RETURN_UNICODE_EMPTY ();
2359+ }
23722360 assert (size > 0 );
2373- if (size == 1 )
2361+ if (size == 1 ) {
23742362 return get_latin1_char (u [0 ]);
2363+ }
23752364
23762365 max_char = ucs1lib_find_max_char (u , u + size );
23772366 res = PyUnicode_New (size , max_char );
@@ -5008,8 +4997,9 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
50084997
50094998 /* ASCII is equivalent to the first 128 ordinals in Unicode. */
50104999 if (size == 1 && (unsigned char )s [0 ] < 128 ) {
5011- if (consumed )
5000+ if (consumed ) {
50125001 * consumed = 1 ;
5002+ }
50135003 return get_latin1_char ((unsigned char )s [0 ]);
50145004 }
50155005
@@ -7176,8 +7166,9 @@ PyUnicode_DecodeASCII(const char *s,
71767166 _Py_RETURN_UNICODE_EMPTY ();
71777167
71787168 /* ASCII is equivalent to the first 128 ordinals in Unicode. */
7179- if (size == 1 && (unsigned char )s [0 ] < 128 )
7169+ if (size == 1 && (unsigned char )s [0 ] < 128 ) {
71807170 return get_latin1_char ((unsigned char )s [0 ]);
7171+ }
71817172
71827173 // Shortcut for simple case
71837174 PyObject * u = PyUnicode_New (size , 127 );
@@ -16234,12 +16225,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
1623416225
1623516226 Py_CLEAR (state -> empty );
1623616227
16228+ for (Py_ssize_t i = 0 ; i < 256 ; i ++ ) {
16229+ Py_CLEAR (state -> latin1 [i ]);
16230+ }
16231+
1623716232 if (is_main_interp ) {
16238- #ifdef LATIN1_SINGLETONS
16239- for (Py_ssize_t i = 0 ; i < 256 ; i ++ ) {
16240- Py_CLEAR (unicode_latin1 [i ]);
16241- }
16242- #endif
1624316233 unicode_clear_static_strings ();
1624416234 }
1624516235
0 commit comments