@@ -1305,6 +1305,45 @@ _PyUnicode_Dump(PyObject *op)
13051305}
13061306#endif
13071307
1308+ // Simplified version of PyUnicode_New() that only creates ASCII strings.
1309+ // This function does not test if size == 0.
1310+ static PyObject *
1311+ ascii_new (Py_ssize_t size )
1312+ {
1313+ PyObject * obj ;
1314+ void * data ;
1315+ Py_ssize_t struct_size = sizeof (PyASCIIObject );
1316+
1317+ if (size > ((PY_SSIZE_T_MAX - struct_size ) - 1 ))
1318+ return PyErr_NoMemory ();
1319+
1320+ /* Duplicated allocation code from _PyObject_New() instead of a call to
1321+ * PyObject_New() so we are able to allocate space for the object and
1322+ * it's data buffer.
1323+ */
1324+ obj = (PyObject * ) PyObject_Malloc (struct_size + (size + 1 ));
1325+ if (obj == NULL ) {
1326+ return PyErr_NoMemory ();
1327+ }
1328+ _PyObject_Init (obj , & PyUnicode_Type );
1329+
1330+ data = ((PyASCIIObject * )obj ) + 1 ;
1331+
1332+ _PyUnicode_LENGTH (obj ) = size ;
1333+ _PyUnicode_HASH (obj ) = -1 ;
1334+ _PyUnicode_STATE (obj ).interned = 0 ;
1335+ _PyUnicode_STATE (obj ).kind = PyUnicode_1BYTE_KIND ;
1336+ _PyUnicode_STATE (obj ).compact = 1 ;
1337+ _PyUnicode_STATE (obj ).ascii = 1 ;
1338+ _PyUnicode_STATE (obj ).statically_allocated = 0 ;
1339+ ((char * )data )[size ] = 0 ;
1340+
1341+ #ifdef Py_DEBUG
1342+ unicode_fill_invalid ((PyObject * )unicode , 0 );
1343+ #endif
1344+ assert (_PyUnicode_CheckConsistency (obj , 0 ));
1345+ return obj ;
1346+ }
13081347
13091348PyObject *
13101349PyUnicode_New (Py_ssize_t size , Py_UCS4 maxchar )
@@ -2208,13 +2247,16 @@ _PyUnicode_FromASCII(const char *buffer, Py_ssize_t size)
22082247{
22092248 const unsigned char * s = (const unsigned char * )buffer ;
22102249 PyObject * unicode ;
2250+ if (size == 0 ) {
2251+ return unicode_get_empty ();
2252+ }
22112253 if (size == 1 ) {
22122254#ifdef Py_DEBUG
22132255 assert ((unsigned char )s [0 ] < 128 );
22142256#endif
22152257 return get_latin1_char (s [0 ]);
22162258 }
2217- unicode = PyUnicode_New (size , 127 );
2259+ unicode = ascii_new (size );
22182260 if (!unicode )
22192261 return NULL ;
22202262 memcpy (PyUnicode_1BYTE_DATA (unicode ), s , size );
@@ -5297,11 +5339,13 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
52975339
52985340 Py_ssize_t pos = find_first_nonascii (starts , end );
52995341 if (pos == size ) { // fast path: ASCII string.
5300- PyObject * u = PyUnicode_New (size , 127 );
5342+ PyObject * u = ascii_new (size );
53015343 if (u == NULL ) {
53025344 return NULL ;
53035345 }
5304- memcpy (PyUnicode_1BYTE_DATA (u ), s , size );
5346+ // memcpy(PyUnicode_1BYTE_DATA(u), s, size);
5347+ // bypass iscompact & isascii checks.
5348+ memcpy (_Py_STATIC_CAST (void * , (_PyASCIIObject_CAST (u ) + 1 )), s , size );
53055349 if (consumed ) {
53065350 * consumed = size ;
53075351 }
@@ -5338,7 +5382,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
53385382 _PyUnicodeWriter writer ;
53395383 _PyUnicodeWriter_InitWithBuffer (& writer , u );
53405384 if (maxchr <= 255 ) {
5341- memcpy (PyUnicode_1BYTE_DATA (u ), s , pos );
5385+ memcpy (_PyUnicode_COMPACT_DATA (u ), s , pos );
53425386 s += pos ;
53435387 size -= pos ;
53445388 writer .pos = pos ;
@@ -7419,7 +7463,7 @@ PyUnicode_DecodeASCII(const char *s,
74197463 }
74207464
74217465 // Shortcut for simple case
7422- PyObject * u = PyUnicode_New (size , 127 );
7466+ PyObject * u = ascii_new (size );
74237467 if (u == NULL ) {
74247468 return NULL ;
74257469 }
0 commit comments