@@ -8331,6 +8331,15 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
83318331
83328332/* --- Helpers ------------------------------------------------------------ */
83338333
8334+ #include "stringlib/asciilib.h"
8335+ #include "stringlib/fastsearch.h"
8336+ #include "stringlib/partition.h"
8337+ #include "stringlib/split.h"
8338+ #include "stringlib/count.h"
8339+ #include "stringlib/find.h"
8340+ #include "stringlib/localeutil.h"
8341+ #include "stringlib/undef.h"
8342+
83348343#include "stringlib/ucs1lib.h"
83358344#include "stringlib/fastsearch.h"
83368345#include "stringlib/partition.h"
@@ -8359,7 +8368,10 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
83598368#include "stringlib/undef.h"
83608369
83618370static Py_ssize_t
8362- any_find_slice (Py_ssize_t Py_LOCAL_CALLBACK (ucs1 )(const Py_UCS1 * , Py_ssize_t ,
8371+ any_find_slice (Py_ssize_t Py_LOCAL_CALLBACK (ascii )(const Py_UCS1 * , Py_ssize_t ,
8372+ const Py_UCS1 * , Py_ssize_t ,
8373+ Py_ssize_t , Py_ssize_t ),
8374+ Py_ssize_t Py_LOCAL_CALLBACK (ucs1 )(const Py_UCS1 * , Py_ssize_t ,
83638375 const Py_UCS1 * , Py_ssize_t ,
83648376 Py_ssize_t , Py_ssize_t ),
83658377 Py_ssize_t Py_LOCAL_CALLBACK (ucs2 )(const Py_UCS2 * , Py_ssize_t ,
@@ -8396,7 +8408,10 @@ any_find_slice(Py_ssize_t Py_LOCAL_CALLBACK(ucs1)(const Py_UCS1*, Py_ssize_t,
83968408
83978409 switch (kind ) {
83988410 case PyUnicode_1BYTE_KIND :
8399- result = ucs1 (buf1 , len1 , buf2 , len2 , start , end );
8411+ if (PyUnicode_IS_ASCII (s1 ) && PyUnicode_IS_ASCII (s2 ))
8412+ result = ascii (buf1 , len1 , buf2 , len2 , start , end );
8413+ else
8414+ result = ucs1 (buf1 , len1 , buf2 , len2 , start , end );
84008415 break ;
84018416 case PyUnicode_2BYTE_KIND :
84028417 result = ucs2 (buf1 , len1 , buf2 , len2 , start , end );
@@ -8417,7 +8432,7 @@ any_find_slice(Py_ssize_t Py_LOCAL_CALLBACK(ucs1)(const Py_UCS1*, Py_ssize_t,
84178432}
84188433
84198434Py_ssize_t
8420- _PyUnicode_InsertThousandsGrouping (int kind , void * data ,
8435+ _PyUnicode_InsertThousandsGrouping (PyObject * unicode , int kind , void * data ,
84218436 Py_ssize_t n_buffer ,
84228437 void * digits , Py_ssize_t n_digits ,
84238438 Py_ssize_t min_width ,
@@ -8426,9 +8441,14 @@ _PyUnicode_InsertThousandsGrouping(int kind, void *data,
84268441{
84278442 switch (kind ) {
84288443 case PyUnicode_1BYTE_KIND :
8429- return _PyUnicode_ucs1_InsertThousandsGrouping (
8430- (Py_UCS1 * )data , n_buffer , (Py_UCS1 * )digits , n_digits ,
8431- min_width , grouping , thousands_sep );
8444+ if (unicode != NULL && PyUnicode_IS_ASCII (unicode ))
8445+ return _PyUnicode_ascii_InsertThousandsGrouping (
8446+ (Py_UCS1 * )data , n_buffer , (Py_UCS1 * )digits , n_digits ,
8447+ min_width , grouping , thousands_sep );
8448+ else
8449+ return _PyUnicode_ucs1_InsertThousandsGrouping (
8450+ (Py_UCS1 * )data , n_buffer , (Py_UCS1 * )digits , n_digits ,
8451+ min_width , grouping , thousands_sep );
84328452 case PyUnicode_2BYTE_KIND :
84338453 return _PyUnicode_ucs2_InsertThousandsGrouping (
84348454 (Py_UCS2 * )data , n_buffer , (Py_UCS2 * )digits , n_digits ,
@@ -8505,10 +8525,16 @@ PyUnicode_Count(PyObject *str,
85058525 ADJUST_INDICES (start , end , len1 );
85068526 switch (kind ) {
85078527 case PyUnicode_1BYTE_KIND :
8508- result = ucs1lib_count (
8509- ((Py_UCS1 * )buf1 ) + start , end - start ,
8510- buf2 , len2 , PY_SSIZE_T_MAX
8511- );
8528+ if (PyUnicode_IS_ASCII (str_obj ) && PyUnicode_IS_ASCII (sub_obj ))
8529+ result = asciilib_count (
8530+ ((Py_UCS1 * )buf1 ) + start , end - start ,
8531+ buf2 , len2 , PY_SSIZE_T_MAX
8532+ );
8533+ else
8534+ result = ucs1lib_count (
8535+ ((Py_UCS1 * )buf1 ) + start , end - start ,
8536+ buf2 , len2 , PY_SSIZE_T_MAX
8537+ );
85128538 break ;
85138539 case PyUnicode_2BYTE_KIND :
85148540 result = ucs2lib_count (
@@ -8565,12 +8591,14 @@ PyUnicode_Find(PyObject *str,
85658591
85668592 if (direction > 0 )
85678593 result = any_find_slice (
8568- ucs1lib_find_slice , ucs2lib_find_slice , ucs4lib_find_slice ,
8594+ asciilib_find_slice , ucs1lib_find_slice ,
8595+ ucs2lib_find_slice , ucs4lib_find_slice ,
85698596 str , sub , start , end
85708597 );
85718598 else
85728599 result = any_find_slice (
8573- ucs1lib_rfind_slice , ucs2lib_rfind_slice , ucs4lib_rfind_slice ,
8600+ asciilib_find_slice , ucs1lib_rfind_slice ,
8601+ ucs2lib_rfind_slice , ucs4lib_rfind_slice ,
85748602 str , sub , start , end
85758603 );
85768604
@@ -9200,9 +9228,14 @@ PyUnicode_Splitlines(PyObject *string, int keepends)
92009228
92019229 switch (PyUnicode_KIND (string )) {
92029230 case PyUnicode_1BYTE_KIND :
9203- list = ucs1lib_splitlines (
9204- (PyObject * ) string , PyUnicode_1BYTE_DATA (string ),
9205- PyUnicode_GET_LENGTH (string ), keepends );
9231+ if (PyUnicode_IS_ASCII (string ))
9232+ list = asciilib_splitlines (
9233+ (PyObject * ) string , PyUnicode_1BYTE_DATA (string ),
9234+ PyUnicode_GET_LENGTH (string ), keepends );
9235+ else
9236+ list = ucs1lib_splitlines (
9237+ (PyObject * ) string , PyUnicode_1BYTE_DATA (string ),
9238+ PyUnicode_GET_LENGTH (string ), keepends );
92069239 break ;
92079240 case PyUnicode_2BYTE_KIND :
92089241 list = ucs2lib_splitlines (
@@ -9241,10 +9274,16 @@ split(PyObject *self,
92419274 if (substring == NULL )
92429275 switch (PyUnicode_KIND (self )) {
92439276 case PyUnicode_1BYTE_KIND :
9244- return ucs1lib_split_whitespace (
9245- (PyObject * ) self , PyUnicode_1BYTE_DATA (self ),
9246- PyUnicode_GET_LENGTH (self ), maxcount
9247- );
9277+ if (PyUnicode_IS_ASCII (self ))
9278+ return asciilib_split_whitespace (
9279+ (PyObject * ) self , PyUnicode_1BYTE_DATA (self ),
9280+ PyUnicode_GET_LENGTH (self ), maxcount
9281+ );
9282+ else
9283+ return ucs1lib_split_whitespace (
9284+ (PyObject * ) self , PyUnicode_1BYTE_DATA (self ),
9285+ PyUnicode_GET_LENGTH (self ), maxcount
9286+ );
92489287 case PyUnicode_2BYTE_KIND :
92499288 return ucs2lib_split_whitespace (
92509289 (PyObject * ) self , PyUnicode_2BYTE_DATA (self ),
@@ -9283,8 +9322,12 @@ split(PyObject *self,
92839322
92849323 switch (kind ) {
92859324 case PyUnicode_1BYTE_KIND :
9286- out = ucs1lib_split (
9287- (PyObject * ) self , buf1 , len1 , buf2 , len2 , maxcount );
9325+ if (PyUnicode_IS_ASCII (self ) && PyUnicode_IS_ASCII (substring ))
9326+ out = asciilib_split (
9327+ (PyObject * ) self , buf1 , len1 , buf2 , len2 , maxcount );
9328+ else
9329+ out = ucs1lib_split (
9330+ (PyObject * ) self , buf1 , len1 , buf2 , len2 , maxcount );
92889331 break ;
92899332 case PyUnicode_2BYTE_KIND :
92909333 out = ucs2lib_split (
@@ -9323,10 +9366,16 @@ rsplit(PyObject *self,
93239366 if (substring == NULL )
93249367 switch (PyUnicode_KIND (self )) {
93259368 case PyUnicode_1BYTE_KIND :
9326- return ucs1lib_rsplit_whitespace (
9327- (PyObject * ) self , PyUnicode_1BYTE_DATA (self ),
9328- PyUnicode_GET_LENGTH (self ), maxcount
9329- );
9369+ if (PyUnicode_IS_ASCII (self ))
9370+ return asciilib_rsplit_whitespace (
9371+ (PyObject * ) self , PyUnicode_1BYTE_DATA (self ),
9372+ PyUnicode_GET_LENGTH (self ), maxcount
9373+ );
9374+ else
9375+ return ucs1lib_rsplit_whitespace (
9376+ (PyObject * ) self , PyUnicode_1BYTE_DATA (self ),
9377+ PyUnicode_GET_LENGTH (self ), maxcount
9378+ );
93309379 case PyUnicode_2BYTE_KIND :
93319380 return ucs2lib_rsplit_whitespace (
93329381 (PyObject * ) self , PyUnicode_2BYTE_DATA (self ),
@@ -9365,8 +9414,12 @@ rsplit(PyObject *self,
93659414
93669415 switch (kind ) {
93679416 case PyUnicode_1BYTE_KIND :
9368- out = ucs1lib_rsplit (
9369- (PyObject * ) self , buf1 , len1 , buf2 , len2 , maxcount );
9417+ if (PyUnicode_IS_ASCII (self ) && PyUnicode_IS_ASCII (substring ))
9418+ out = asciilib_rsplit (
9419+ (PyObject * ) self , buf1 , len1 , buf2 , len2 , maxcount );
9420+ else
9421+ out = ucs1lib_rsplit (
9422+ (PyObject * ) self , buf1 , len1 , buf2 , len2 , maxcount );
93709423 break ;
93719424 case PyUnicode_2BYTE_KIND :
93729425 out = ucs2lib_rsplit (
@@ -9387,12 +9440,15 @@ rsplit(PyObject *self,
93879440}
93889441
93899442static Py_ssize_t
9390- anylib_find (int kind , void * buf1 , Py_ssize_t len1 ,
9391- void * buf2 , Py_ssize_t len2 , Py_ssize_t offset )
9443+ anylib_find (int kind , PyObject * str1 , void * buf1 , Py_ssize_t len1 ,
9444+ PyObject * str2 , void * buf2 , Py_ssize_t len2 , Py_ssize_t offset )
93929445{
93939446 switch (kind ) {
93949447 case PyUnicode_1BYTE_KIND :
9395- return ucs1lib_find (buf1 , len1 , buf2 , len2 , offset );
9448+ if (PyUnicode_IS_ASCII (str1 ) && PyUnicode_IS_ASCII (str2 ))
9449+ return asciilib_find (buf1 , len1 , buf2 , len2 , offset );
9450+ else
9451+ return ucs1lib_find (buf1 , len1 , buf2 , len2 , offset );
93969452 case PyUnicode_2BYTE_KIND :
93979453 return ucs2lib_find (buf1 , len1 , buf2 , len2 , offset );
93989454 case PyUnicode_4BYTE_KIND :
@@ -9403,12 +9459,15 @@ anylib_find(int kind, void *buf1, Py_ssize_t len1,
94039459}
94049460
94059461static Py_ssize_t
9406- anylib_count (int kind , void * sbuf , Py_ssize_t slen ,
9407- void * buf1 , Py_ssize_t len1 , Py_ssize_t maxcount )
9462+ anylib_count (int kind , PyObject * sstr , void * sbuf , Py_ssize_t slen ,
9463+ PyObject * str1 , void * buf1 , Py_ssize_t len1 , Py_ssize_t maxcount )
94089464{
94099465 switch (kind ) {
94109466 case PyUnicode_1BYTE_KIND :
9411- return ucs1lib_count (sbuf , slen , buf1 , len1 , maxcount );
9467+ if (PyUnicode_IS_ASCII (sstr ) && PyUnicode_IS_ASCII (str1 ))
9468+ return asciilib_count (sbuf , slen , buf1 , len1 , maxcount );
9469+ else
9470+ return ucs1lib_count (sbuf , slen , buf1 , len1 , maxcount );
94129471 case PyUnicode_2BYTE_KIND :
94139472 return ucs2lib_count (sbuf , slen , buf1 , len1 , maxcount );
94149473 case PyUnicode_4BYTE_KIND :
@@ -9497,7 +9556,7 @@ replace(PyObject *self, PyObject *str1,
94979556 if (!buf1 ) goto error ;
94989557 release1 = 1 ;
94999558 }
9500- i = anylib_find (rkind , sbuf , slen , buf1 , len1 , 0 );
9559+ i = anylib_find (rkind , self , sbuf , slen , str1 , buf1 , len1 , 0 );
95019560 if (i < 0 )
95029561 goto nothing ;
95039562 if (rkind > kind2 ) {
@@ -9530,9 +9589,9 @@ replace(PyObject *self, PyObject *str1,
95309589 i += len1 ;
95319590
95329591 while ( -- maxcount > 0 ) {
9533- i = anylib_find (rkind , sbuf + PyUnicode_KIND_SIZE ( rkind , i ) ,
9534- slen - i ,
9535- buf1 , len1 , i );
9592+ i = anylib_find (rkind , self ,
9593+ sbuf + PyUnicode_KIND_SIZE ( rkind , i ), slen - i ,
9594+ str1 , buf1 , len1 , i );
95369595 if (i == -1 )
95379596 break ;
95389597 memcpy (res + PyUnicode_KIND_SIZE (rkind , i ),
@@ -9557,7 +9616,7 @@ replace(PyObject *self, PyObject *str1,
95579616 if (!buf1 ) goto error ;
95589617 release1 = 1 ;
95599618 }
9560- n = anylib_count (rkind , sbuf , slen , buf1 , len1 , maxcount );
9619+ n = anylib_count (rkind , self , sbuf , slen , str1 , buf1 , len1 , maxcount );
95619620 if (n == 0 )
95629621 goto nothing ;
95639622 if (kind2 < rkind ) {
@@ -9596,9 +9655,9 @@ replace(PyObject *self, PyObject *str1,
95969655 if (len1 > 0 ) {
95979656 while (n -- > 0 ) {
95989657 /* look for next match */
9599- j = anylib_find (rkind ,
9600- sbuf + PyUnicode_KIND_SIZE (rkind , i ),
9601- slen - i , buf1 , len1 , i );
9658+ j = anylib_find (rkind , self ,
9659+ sbuf + PyUnicode_KIND_SIZE (rkind , i ), slen - i ,
9660+ str1 , buf1 , len1 , i );
96029661 if (j == -1 )
96039662 break ;
96049663 else if (j > i ) {
@@ -10443,7 +10502,8 @@ unicode_find(PyObject *self, PyObject *args)
1044310502 return NULL ;
1044410503
1044510504 result = any_find_slice (
10446- ucs1lib_find_slice , ucs2lib_find_slice , ucs4lib_find_slice ,
10505+ asciilib_find_slice , ucs1lib_find_slice ,
10506+ ucs2lib_find_slice , ucs4lib_find_slice ,
1044710507 self , (PyObject * )substring , start , end
1044810508 );
1044910509
@@ -10536,7 +10596,8 @@ unicode_index(PyObject *self, PyObject *args)
1053610596 return NULL ;
1053710597
1053810598 result = any_find_slice (
10539- ucs1lib_find_slice , ucs2lib_find_slice , ucs4lib_find_slice ,
10599+ asciilib_find_slice , ucs1lib_find_slice ,
10600+ ucs2lib_find_slice , ucs4lib_find_slice ,
1054010601 self , (PyObject * )substring , start , end
1054110602 );
1054210603
@@ -11548,7 +11609,8 @@ unicode_rfind(PyObject *self, PyObject *args)
1154811609 return NULL ;
1154911610
1155011611 result = any_find_slice (
11551- ucs1lib_rfind_slice , ucs2lib_rfind_slice , ucs4lib_rfind_slice ,
11612+ asciilib_rfind_slice , ucs1lib_rfind_slice ,
11613+ ucs2lib_rfind_slice , ucs4lib_rfind_slice ,
1155211614 self , (PyObject * )substring , start , end
1155311615 );
1155411616
@@ -11583,7 +11645,8 @@ unicode_rindex(PyObject *self, PyObject *args)
1158311645 return NULL ;
1158411646
1158511647 result = any_find_slice (
11586- ucs1lib_rfind_slice , ucs2lib_rfind_slice , ucs4lib_rfind_slice ,
11648+ asciilib_rfind_slice , ucs1lib_rfind_slice ,
11649+ ucs2lib_rfind_slice , ucs4lib_rfind_slice ,
1158711650 self , (PyObject * )substring , start , end
1158811651 );
1158911652
@@ -11712,7 +11775,10 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
1171211775
1171311776 switch (PyUnicode_KIND (str_obj )) {
1171411777 case PyUnicode_1BYTE_KIND :
11715- out = ucs1lib_partition (str_obj , buf1 , len1 , sep_obj , buf2 , len2 );
11778+ if (PyUnicode_IS_ASCII (str_obj ) && PyUnicode_IS_ASCII (sep_obj ))
11779+ out = asciilib_partition (str_obj , buf1 , len1 , sep_obj , buf2 , len2 );
11780+ else
11781+ out = ucs1lib_partition (str_obj , buf1 , len1 , sep_obj , buf2 , len2 );
1171611782 break ;
1171711783 case PyUnicode_2BYTE_KIND :
1171811784 out = ucs2lib_partition (str_obj , buf1 , len1 , sep_obj , buf2 , len2 );
@@ -11781,7 +11847,10 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
1178111847
1178211848 switch (PyUnicode_KIND (str_in )) {
1178311849 case PyUnicode_1BYTE_KIND :
11784- out = ucs1lib_rpartition (str_obj , buf1 , len1 , sep_obj , buf2 , len2 );
11850+ if (PyUnicode_IS_ASCII (str_obj ) && PyUnicode_IS_ASCII (sep_obj ))
11851+ out = asciilib_rpartition (str_obj , buf1 , len1 , sep_obj , buf2 , len2 );
11852+ else
11853+ out = ucs1lib_rpartition (str_obj , buf1 , len1 , sep_obj , buf2 , len2 );
1178511854 break ;
1178611855 case PyUnicode_2BYTE_KIND :
1178711856 out = ucs2lib_rpartition (str_obj , buf1 , len1 , sep_obj , buf2 , len2 );
0 commit comments