6363
6464/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
6565 structure. state.ascii and state.compact are set, and the data
66- immediately follow the structure. utf8_length and wstr_length can be found
66+ immediately follow the structure. utf8_length can be found
6767 in the length field; the utf8 pointer is equal to the data pointer. */
6868typedef struct {
6969 /* There are 4 forms of Unicode strings:
@@ -76,7 +76,7 @@ typedef struct {
7676 * compact = 1
7777 * ascii = 1
7878 * ready = 1
79- * (length is the length of the utf8 and wstr strings )
79+ * (length is the length of the utf8)
8080 * (data starts just after the structure)
8181 * (since ASCII is decoded from UTF-8, the utf8 string are the data)
8282
@@ -91,51 +91,25 @@ typedef struct {
9191 * ascii = 0
9292 * utf8 is not shared with data
9393 * utf8_length = 0 if utf8 is NULL
94- * wstr is shared with data and wstr_length=length
95- if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
96- or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
97- * wstr_length = 0 if wstr is NULL
9894 * (data starts just after the structure)
9995
100- - legacy string, not ready:
101-
102- * structure = PyUnicodeObject
103- * test: kind == PyUnicode_WCHAR_KIND
104- * length = 0 (use wstr_length)
105- * hash = -1
106- * kind = PyUnicode_WCHAR_KIND
107- * compact = 0
108- * ascii = 0
109- * ready = 0
110- * interned = SSTATE_NOT_INTERNED
111- * wstr is not NULL
112- * data.any is NULL
113- * utf8 is NULL
114- * utf8_length = 0
115-
11696 - legacy string, ready:
11797
11898 * structure = PyUnicodeObject structure
119- * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
99+ * test: !PyUnicode_IS_COMPACT(op)
120100 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
121101 PyUnicode_4BYTE_KIND
122102 * compact = 0
123103 * ready = 1
124104 * data.any is not NULL
125105 * utf8 is shared and utf8_length = length with data.any if ascii = 1
126106 * utf8_length = 0 if utf8 is NULL
127- * wstr is shared with data.any and wstr_length = length
128- if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
129- or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
130- * wstr_length = 0 if wstr is NULL
131107
132108 Compact strings use only one memory block (structure + characters),
133109 whereas legacy strings use one block for the structure and one block
134110 for characters.
135111
136- Legacy strings are created by PyUnicode_FromUnicode() and
137- PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
138- when PyUnicode_READY() is called.
112+ Legacy strings are created by subclasses of Unicode.
139113
140114 See also _PyUnicode_CheckConsistency().
141115 */
@@ -154,11 +128,6 @@ typedef struct {
154128 unsigned int interned :2 ;
155129 /* Character size:
156130
157- - PyUnicode_WCHAR_KIND (0):
158-
159- * character type = wchar_t (16 or 32 bits, depending on the
160- platform)
161-
162131 - PyUnicode_1BYTE_KIND (1):
163132
164133 * character type = Py_UCS1 (8 bits, unsigned)
@@ -198,7 +167,6 @@ typedef struct {
198167 4 bytes (see issue #19537 on m68k). */
199168 unsigned int :24 ;
200169 } state ;
201- wchar_t * wstr ; /* wchar_t representation (null-terminated) */
202170} PyASCIIObject ;
203171
204172/* Non-ASCII strings allocated through PyUnicode_New use the
@@ -209,13 +177,9 @@ typedef struct {
209177 Py_ssize_t utf8_length ; /* Number of bytes in utf8, excluding the
210178 * terminating \0. */
211179 char * utf8 ; /* UTF-8 representation (null-terminated) */
212- Py_ssize_t wstr_length ; /* Number of code points in wstr, possible
213- * surrogates count as two code points. */
214180} PyCompactUnicodeObject ;
215181
216- /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
217- PyUnicodeObject structure. The actual string data is initially in the wstr
218- block, and copied into the data block using _PyUnicode_Ready. */
182+ /* Object format for Unicode subclasses. */
219183typedef struct {
220184 PyCompactUnicodeObject _base ;
221185 union {
@@ -298,10 +262,6 @@ static inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) {
298262#endif
299263
300264enum PyUnicode_Kind {
301- /* String contains only wstr byte characters. This is only possible
302- when the string was created with a legacy API and _PyUnicode_Ready()
303- has not been called yet. */
304- PyUnicode_WCHAR_KIND = 0 ,
305265/* Return values of the PyUnicode_KIND() function: */
306266 PyUnicode_1BYTE_KIND = 1 ,
307267 PyUnicode_2BYTE_KIND = 2 ,
@@ -459,27 +419,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_New(
459419 Py_UCS4 maxchar /* maximum code point value in the string */
460420 );
461421
462- /* Initializes the canonical string representation from the deprecated
463- wstr/Py_UNICODE representation. This function is used to convert Unicode
464- objects which were created using the old API to the new flexible format
465- introduced with PEP 393.
466-
467- Don't call this function directly, use the public PyUnicode_READY() function
468- instead. */
469- PyAPI_FUNC (int ) _PyUnicode_Ready (
470- PyObject * unicode /* Unicode object */
471- );
472-
473422/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
474423 case. If the canonical representation is not yet set, it will still call
475424 _PyUnicode_Ready().
476425 Returns 0 on success and -1 on errors. */
477426static inline int PyUnicode_READY (PyObject * op )
478427{
479- if (PyUnicode_IS_READY (op )) {
480- return 0 ;
481- }
482- return _PyUnicode_Ready (op );
428+ assert (PyUnicode_IS_READY (op ));
429+ return 0 ;
483430}
484431#if !defined(Py_LIMITED_API ) || Py_LIMITED_API + 0 < 0x030b0000
485432# define PyUnicode_READY (op ) PyUnicode_READY(_PyObject_CAST(op))
0 commit comments