@@ -231,28 +231,19 @@ get_unicode_state(void)
231231
232232
233233// Return a borrowed reference to the empty string singleton.
234- // Return NULL if the singleton was not created yet.
235234static inline PyObject * unicode_get_empty (void )
236235{
237236 struct _Py_unicode_state * state = get_unicode_state ();
237+ // unicode_get_empty() must not be called before _PyUnicode_Init()
238+ // or after _PyUnicode_Fini()
239+ assert (state -> empty != NULL );
238240 return state -> empty ;
239241}
240242
241243static inline PyObject * unicode_new_empty (void )
242244{
243- struct _Py_unicode_state * state = get_unicode_state ();
244- PyObject * empty = state -> empty ;
245- if (empty != NULL ) {
246- Py_INCREF (empty );
247- }
248- else {
249- empty = PyUnicode_New (0 , 0 );
250- if (empty != NULL ) {
251- Py_INCREF (empty );
252- assert (_PyUnicode_CheckConsistency (empty , 1 ));
253- state -> empty = empty ;
254- }
255- }
245+ PyObject * empty = unicode_get_empty ();
246+ Py_INCREF (empty );
256247 return empty ;
257248}
258249
@@ -696,12 +687,9 @@ unicode_result_ready(PyObject *unicode)
696687 PyObject * empty = unicode_get_empty ();
697688 if (unicode != empty ) {
698689 Py_DECREF (unicode );
699-
700690 Py_INCREF (empty );
701- return empty ;
702691 }
703- // unicode is the empty string singleton
704- return unicode ;
692+ return empty ;
705693 }
706694
707695#ifdef LATIN1_SINGLETONS
@@ -959,7 +947,7 @@ ensure_unicode(PyObject *obj)
959947
960948/* Compilation of templated routines */
961949
962- #define STRINGLIB_GET_EMPTY () unicode_get_empty()
950+ #define STRINGLIB_GET_EMPTY () unicode_get_empty()
963951
964952#include "stringlib/asciilib.h"
965953#include "stringlib/fastsearch.h"
@@ -1260,11 +1248,7 @@ _PyUnicode_New(Py_ssize_t length)
12601248
12611249 /* Optimization for empty strings */
12621250 if (length == 0 ) {
1263- PyObject * empty = unicode_get_empty ();
1264- if (empty != NULL ) {
1265- Py_INCREF (empty );
1266- return (PyUnicodeObject * )empty ;
1267- }
1251+ return (PyUnicodeObject * )unicode_new_empty ();
12681252 }
12691253
12701254 /* Ensure we won't overflow the size. */
@@ -1416,11 +1400,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
14161400{
14171401 /* Optimization for empty strings */
14181402 if (size == 0 ) {
1419- PyObject * empty = unicode_get_empty ();
1420- if (empty != NULL ) {
1421- Py_INCREF (empty );
1422- return empty ;
1423- }
1403+ return unicode_new_empty ();
14241404 }
14251405
14261406 PyObject * obj ;
@@ -2001,8 +1981,7 @@ unicode_dealloc(PyObject *unicode)
20011981static int
20021982unicode_is_singleton (PyObject * unicode )
20031983{
2004- struct _Py_unicode_state * state = get_unicode_state ();
2005- if (unicode == state -> empty ) {
1984+ if (unicode == unicode_get_empty ()) {
20061985 return 1 ;
20071986 }
20081987#ifdef LATIN1_SINGLETONS
@@ -2059,8 +2038,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
20592038
20602039 if (length == 0 ) {
20612040 PyObject * empty = unicode_new_empty ();
2062- if (!empty )
2063- return -1 ;
20642041 Py_SETREF (* p_unicode , empty );
20652042 return 0 ;
20662043 }
@@ -10868,10 +10845,7 @@ replace(PyObject *self, PyObject *str1,
1086810845 }
1086910846 new_size = slen + n * (len2 - len1 );
1087010847 if (new_size == 0 ) {
10871- PyObject * empty = unicode_new_empty ();
10872- if (!empty )
10873- goto error ;
10874- u = empty ;
10848+ u = unicode_new_empty ();
1087510849 goto done ;
1087610850 }
1087710851 if (new_size > (PY_SSIZE_T_MAX / rkind )) {
@@ -13293,13 +13267,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
1329313267 len2 = PyUnicode_GET_LENGTH (sep_obj );
1329413268 if (kind1 < kind2 || len1 < len2 ) {
1329513269 PyObject * empty = unicode_get_empty (); // Borrowed reference
13296- if (!empty ) {
13297- out = NULL ;
13298- }
13299- else {
13300- out = PyTuple_Pack (3 , str_obj , empty , empty );
13301- }
13302- return out ;
13270+ return PyTuple_Pack (3 , str_obj , empty , empty );
1330313271 }
1330413272 buf1 = PyUnicode_DATA (str_obj );
1330513273 buf2 = PyUnicode_DATA (sep_obj );
@@ -13351,13 +13319,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
1335113319 len2 = PyUnicode_GET_LENGTH (sep_obj );
1335213320 if (kind1 < kind2 || len1 < len2 ) {
1335313321 PyObject * empty = unicode_get_empty (); // Borrowed reference
13354- if (!empty ) {
13355- out = NULL ;
13356- }
13357- else {
13358- out = PyTuple_Pack (3 , empty , empty , str_obj );
13359- }
13360- return out ;
13322+ return PyTuple_Pack (3 , empty , empty , str_obj );
1336113323 }
1336213324 buf1 = PyUnicode_DATA (str_obj );
1336313325 buf2 = PyUnicode_DATA (sep_obj );
@@ -15589,12 +15551,20 @@ _PyUnicode_Init(PyThreadState *tstate)
1558915551 0x2029 , /* PARAGRAPH SEPARATOR */
1559015552 };
1559115553
15592- /* Init the implementation */
15593- PyObject * empty = unicode_new_empty ();
15594- if (!empty ) {
15554+ // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
15555+ // optimized to always use state->empty without having to check if it is
15556+ // NULL or not.
15557+ PyObject * empty = PyUnicode_New (1 , 0 );
15558+ if (empty == NULL ) {
1559515559 return _PyStatus_NO_MEMORY ();
1559615560 }
15597- Py_DECREF (empty );
15561+ PyUnicode_1BYTE_DATA (empty )[0 ] = 0 ;
15562+ _PyUnicode_LENGTH (empty ) = 0 ;
15563+ assert (_PyUnicode_CheckConsistency (empty , 1 ));
15564+
15565+ struct _Py_unicode_state * state = & tstate -> interp -> unicode ;
15566+ assert (state -> empty == NULL );
15567+ state -> empty = empty ;
1559815568
1559915569 if (_Py_IsMainInterpreter (tstate )) {
1560015570 /* initialize the linebreak bloom filter */
0 commit comments