@@ -11,6 +11,257 @@ extern "C" {
1111#include  "pycore_fileutils.h"      // _Py_error_handler 
1212#include  "pycore_ucnhash.h"        // _PyUnicode_Name_CAPI 
1313
14+ // Maximum code point of Unicode 6.0: 0x10ffff (1,114,111). 
15+ // The value must be the same in fileutils.c. 
16+ #define  _Py_MAX_UNICODE  0x10ffff
17+ 
18+ #define  _Py_LEFTSTRIP  0
19+ #define  _Py_RIGHTSTRIP  1
20+ #define  _Py_BOTHSTRIP  2
21+ 
22+ extern  int  _PyUnicode_CheckEncodingErrors (
23+     const  char  * encoding ,
24+     const  char  * errors );
25+ extern  PyObject *  _PyUnicode_GetEmpty (void );
26+ extern  PyObject *  _PyUnicode_Result (PyObject  * unicode );
27+ extern  PyObject *  _PyUnicode_ResultUnchanged (PyObject  * unicode );
28+ extern  Py_ssize_t  _PyUnicode_FindChar (
29+     const  void  * s ,
30+     int  kind ,
31+     Py_ssize_t  size ,
32+     Py_UCS4  ch ,
33+     int  direction );
34+ extern  PyObject *  _PyUnicode_GetLatin1Char (Py_UCS1  ch );
35+ extern  char *  PyUnicode_UTF8 (PyObject  * op );
36+ extern  Py_ssize_t  PyUnicode_UTF8_LENGTH (PyObject  * op );
37+ extern  void  _PyUnicode_SET_UTF8 (PyObject  * op , char  * utf8 );
38+ extern  void  _PyUnicode_SET_UTF8_LENGTH (PyObject  * op , Py_ssize_t  length );
39+ extern  PyObject *  _PyUnicode_FromUCS1 (const  Py_UCS1 *  u , Py_ssize_t  size );
40+ extern  PyObject *  _PyUnicode_TranslateCharmap (
41+     PyObject  * input ,
42+     PyObject  * mapping ,
43+     const  char  * errors );
44+ extern  int  _PyUnicode_FillUTF8 (PyObject  * unicode );
45+ extern  int  _PyUnicode_DecodeUTF8Writer (
46+     _PyUnicodeWriter  * writer ,
47+     const  char  * s ,
48+     Py_ssize_t  size ,
49+     _Py_error_handler  error_handler ,
50+     const  char  * errors ,
51+     Py_ssize_t  * consumed );
52+ extern  int  _Py_normalize_encoding (const  char  * , char  * , size_t );
53+ extern  void *  _PyUnicode_AsKind (
54+     int  skind ,
55+     void  const  * data ,
56+     Py_ssize_t  len ,
57+     int  kind );
58+ extern  int  _PyUnicode_Tailmatch (
59+     PyObject  * self ,
60+     PyObject  * substring ,
61+     Py_ssize_t  start ,
62+     Py_ssize_t  end ,
63+     int  direction );
64+ extern  Py_ssize_t  _PyUnicode_Count (
65+     PyObject  * str ,
66+     PyObject  * substr ,
67+     Py_ssize_t  start ,
68+     Py_ssize_t  end );
69+ extern  PyObject  *  _PyUnicode_Replace (
70+     PyObject  * self ,
71+     PyObject  * str1 ,
72+     PyObject  * str2 ,
73+     Py_ssize_t  maxcount );
74+ extern  Py_ssize_t  _PyUnicode_AnylibFindSlice (
75+     PyObject *  s1 ,
76+     PyObject *  s2 ,
77+     Py_ssize_t  start ,
78+     Py_ssize_t  end ,
79+     int  direction );
80+ extern  int  _PyUnicode_FindMaxCharSurrogates (
81+     const  wchar_t  * begin ,
82+     const  wchar_t  * end ,
83+     Py_UCS4  * maxchar ,
84+     Py_ssize_t  * num_surrogates );
85+ extern  void  _PyUnicode_WriteWideChar (
86+     int  kind ,
87+     void  * data ,
88+     const  wchar_t  * u ,
89+     Py_ssize_t  size ,
90+     Py_ssize_t  num_surrogates );
91+ extern  PyObject *  _PyUnicode_FromUCS2 (const  Py_UCS2  * u , Py_ssize_t  size );
92+ extern  PyObject *  _PyUnicode_FromUCS4 (const  Py_UCS4  * u , Py_ssize_t  size );
93+ extern  PyObject *  _PyUnicode_FromOrdinal (Py_UCS4  ordinal );
94+ extern  PyObject *  _PyUnicode_do_string_format (
95+     PyObject  * self ,
96+     PyObject  * args ,
97+     PyObject  * kwargs );
98+ extern  PyObject *  _PyUnicode_do_string_format_map (
99+     PyObject  * self ,
100+     PyObject  * obj );
101+ extern  Py_hash_t  _PyUnicode_Hash (PyObject  * self );
102+ extern  PyObject *  _PyUnicode_Iter (PyObject  * seq );
103+ extern  int  _PyUnicode_IsModifiable (PyObject  * unicode );
104+ extern  void  _PyUnicode_Fill (
105+     int  kind ,
106+     void  * data ,
107+     Py_UCS4  value ,
108+     Py_ssize_t  start ,
109+     Py_ssize_t  length );
110+ extern  PyObject *  _PyUnicode_ResizeCompact (
111+     PyObject  * unicode ,
112+     Py_ssize_t  length );
113+ extern  int  _PyUnicode_CheckModifiable (PyObject  * unicode );
114+ extern  PyObject *  _PyUnicode_Repr (PyObject  * unicode );
115+ extern  PyObject *  _PyUnicode_Pad (
116+     PyObject  * self ,
117+     Py_ssize_t  left ,
118+     Py_ssize_t  right ,
119+     Py_UCS4  fill );
120+ extern  int  _PyUnicode_Resize (PyObject  * * p_unicode , Py_ssize_t  length );
121+ extern  PyObject *  _PyUnicode_EncodeUTF8 (
122+     PyObject  * unicode ,
123+     _Py_error_handler  error_handler ,
124+     const  char  * errors );
125+ extern  PyObject *  _PyUnicode_DecodeUTF8 (
126+     const  char  * s ,
127+     Py_ssize_t  size ,
128+     _Py_error_handler  error_handler ,
129+     const  char  * errors ,
130+     Py_ssize_t  * consumed );
131+ extern  char *  _PyUnicode_Backslashreplace (
132+     PyBytesWriter  * writer ,
133+     char  * str ,
134+     PyObject  * unicode ,
135+     Py_ssize_t  collstart ,
136+     Py_ssize_t  collend );
137+ extern  char *  _PyUnicode_Xmlcharrefreplace (
138+     PyBytesWriter  * writer ,
139+     char  * str ,
140+     PyObject  * unicode ,
141+     Py_ssize_t  collstart ,
142+     Py_ssize_t  collend );
143+ extern  PyObject *  _PyUnicode_EncodeCallErrorHandler (
144+     const  char  * errors ,
145+     PyObject  * * errorHandler ,
146+     const  char  * encoding ,
147+     const  char  * reason ,
148+     PyObject  * unicode ,
149+     PyObject  * * exceptionObject ,
150+     Py_ssize_t  startpos ,
151+     Py_ssize_t  endpos ,
152+     Py_ssize_t  * newpos );
153+ extern  void  _PyUnicode_RaiseEncodeException (
154+     PyObject  * * exceptionObject ,
155+     const  char  * encoding ,
156+     PyObject  * unicode ,
157+     Py_ssize_t  startpos ,
158+     Py_ssize_t  endpos ,
159+     const  char  * reason );
160+ extern  int  _PyUnicode_DecodeCallErrorHandlerWriter (
161+     const  char  * errors ,
162+     PyObject  * * errorHandler ,
163+     const  char  * encoding ,
164+     const  char  * reason ,
165+     const  char  * * input ,
166+     const  char  * * inend ,
167+     Py_ssize_t  * startinpos ,
168+     Py_ssize_t  * endinpos ,
169+     PyObject  * * exceptionObject ,
170+     const  char  * * inptr ,
171+     _PyUnicodeWriter  * writer );
172+ extern  PyObject *  _PyUnicode_EncodeUCS1 (
173+     PyObject  * unicode ,
174+     const  char  * errors ,
175+     const  Py_UCS4  limit );
176+ extern  void  _PyUnicode_InitGlobalState (void );
177+ extern  PyObject *  _PyUnicode_do_strip (PyObject  * self , int  striptype );
178+ extern  PyObject *  _PyUnicode_Split (
179+     PyObject  * self ,
180+     PyObject  * substring ,
181+     Py_ssize_t  maxcount );
182+ extern  PyObject *  _PyUnicode_RSplit (
183+     PyObject  * self ,
184+     PyObject  * substring ,
185+     Py_ssize_t  maxcount );
186+ extern  PyObject *  _PyUnicode_Maketrans (
187+     PyObject  * x ,
188+     PyObject  * y ,
189+     PyObject  * z );
190+ extern  PyObject *  _PyUnicode_Expandtabs (
191+     PyObject  * self ,
192+     int  tabsize );
193+ void  _PyUnicode_MakeDecodeException (
194+     PyObject  * * exceptionObject ,
195+     const  char  * encoding ,
196+     const  char  * input , Py_ssize_t  length ,
197+     Py_ssize_t  startpos , Py_ssize_t  endpos ,
198+     const  char  * reason );
199+ 
200+ extern  PyTypeObject  _Py_EncodingMapType ;
201+ extern  PyTypeObject  _Py_FieldNameIter_Type ;
202+ extern  PyTypeObject  _Py_FormatterIter_Type ;
203+ 
204+ /* helper macro to fixup start/end slice values */ 
205+ #define  _Py_ADJUST_INDICES (start , end , len ) \
206+     do {                                \
207+         if (end > len) {                \
208+             end = len;                  \
209+         }                               \
210+         else if (end < 0) {             \
211+             end += len;                 \
212+             if (end < 0) {              \
213+                 end = 0;                \
214+             }                           \
215+         }                               \
216+         if (start < 0) {                \
217+             start += len;               \
218+             if (start < 0) {            \
219+                 start = 0;              \
220+             }                           \
221+         }                               \
222+     } while (0)
223+ 
224+ /* Generic helper macro to convert characters of different types. 
225+    from_type and to_type have to be valid type names, begin and end 
226+    are pointers to the source characters which should be of type 
227+    "from_type *".  to is a pointer of type "to_type *" and points to the 
228+    buffer where the result characters are written to. */ 
229+ #define  _PyUnicode_CONVERT_BYTES (from_type , to_type , begin , end , to ) \
230+     do {                                                \
231+         to_type *_to = (to_type *)(to);                 \
232+         const from_type *_iter = (const from_type *)(begin);\
233+         const from_type *_end = (const from_type *)(end);\
234+         Py_ssize_t n = (_end) - (_iter);                \
235+         const from_type *_unrolled_end =                \
236+             _iter + _Py_SIZE_ROUND_DOWN(n, 4);          \
237+         while (_iter < (_unrolled_end)) {               \
238+             _to[0] = (to_type) _iter[0];                \
239+             _to[1] = (to_type) _iter[1];                \
240+             _to[2] = (to_type) _iter[2];                \
241+             _to[3] = (to_type) _iter[3];                \
242+             _iter += 4; _to += 4;                       \
243+         }                                               \
244+         while (_iter < (_end))                          \
245+             *_to++ = (to_type) *_iter++;                \
246+     } while (0)
247+ 
248+ #ifdef  Py_DEBUG 
249+ #  define  _PyUnicode_CHECK (op ) _PyUnicode_CheckConsistency(op, 0)
250+ #else 
251+ #  define  _PyUnicode_CHECK (op ) PyUnicode_Check(op)
252+ #endif 
253+ 
254+ static  inline  int 
255+ _PyUnicode_Ensure (PyObject  * obj )
256+ {
257+     if  (!PyUnicode_Check (obj )) {
258+         PyErr_Format (PyExc_TypeError , "must be str, not %T" , obj );
259+         return  -1 ;
260+     }
261+     return  0 ;
262+ }
263+ 
264+ 
14265/* --- Characters Type APIs ----------------------------------------------- */ 
15266
16267extern  int  _PyUnicode_IsXidStart (Py_UCS4  ch );
@@ -73,6 +324,17 @@ extern Py_UCS4 _PyUnicode_FindMaxChar (
73324
74325/* --- _PyUnicodeWriter API ----------------------------------------------- */ 
75326
327+ static  inline  int 
328+ _PyUnicodeWriter_WriteCharInline (_PyUnicodeWriter  * writer , Py_UCS4  ch )
329+ {
330+     assert (ch  <= _Py_MAX_UNICODE );
331+     if  (_PyUnicodeWriter_Prepare (writer , 1 , ch ) <  0 )
332+         return  -1 ;
333+     PyUnicode_WRITE (writer -> kind , writer -> data , writer -> pos , ch );
334+     writer -> pos ++ ;
335+     return  0 ;
336+ }
337+ 
76338/* Format the object based on the format_spec, as defined in PEP 3101 
77339   (Advanced String Formatting). */ 
78340extern  int  _PyUnicode_FormatAdvancedWriter (
@@ -88,6 +350,10 @@ extern int _PyUnicodeWriter_FormatV(
88350    const  char  * format ,
89351    va_list  vargs );
90352
353+ extern  void  _PyUnicodeWriter_InitWithBuffer (
354+     _PyUnicodeWriter  * writer ,
355+     PyObject  * buffer );
356+ 
91357/* --- UTF-7 Codecs ------------------------------------------------------- */ 
92358
93359extern  PyObject *  _PyUnicode_EncodeUTF7 (
0 commit comments