@@ -5090,52 +5090,98 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
50905090 return p - start ;
50915091}
50925092
5093- static Py_ssize_t
5094- find_first_nonascii (const char * start , const char * end )
5095- {
5096- const char * p = start ;
5097-
5098- while (p < end ) {
5099- /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
5100- for an explanation. */
5101- if (_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )) {
5102- const char * e = end - SIZEOF_SIZE_T ;
5103- while (p <= e ) {
5104- size_t value = (* (const size_t * )p ) & ASCII_CHAR_MASK ;
5105- if (value ) {
5106- #if PY_LITTLE_ENDIAN && (defined(__clang__ ) || defined(__GNUC__ ))
5107- #if SIZEOF_SIZE_T == 4
5108- // __builtin_ctz(0x8000) == 15.
5109- // (15-7) / 8 == 1.
5110- // p+1 is first non-ASCII char.
5111- return p - start + (__builtin_ctz (value ) - 7 ) / 8 ;
5112- #else
5113- return p - start + (__builtin_ctzll (value ) - 7 ) / 8 ;
5114- #endif
5115- #elif PY_LITTLE_ENDIAN && defined(_MSC_VER )
5116- unsigned long bitpos ;
5093+ #if (defined(__clang__ ) || defined(__GNUC__ ))
5094+ #define HAS_CTZ 1
5095+ static inline unsigned int ctz (size_t v ) {
5096+ return __builtin_ctzll ((unsigned long long )v );
5097+ }
5098+ #elif defined(_MSC_VER )
5099+ #define HAS_CTZ 1
5100+ static inline unsigned int ctz (size_t v ) {
5101+ unsigned long pos ;
51175102#if SIZEOF_SIZE_T == 4
5118- _BitScanForward (& bitpos , value );
5103+ _BitScanForward (& pos , v );
51195104#else
5120- _BitScanForward64 (& bitpos , value );
5105+ _BitScanForward64 (& pos , v );
5106+ #endif /* SIZEOF_SIZE_T */
5107+ return pos ;
5108+ }
51215109#endif
5122- return p - start + (bitpos - 7 ) / 8 ;
5110+
5111+ static Py_ssize_t
5112+ find_first_nonascii (const unsigned char * start , const unsigned char * end )
5113+ {
5114+ const unsigned char * p = start ;
5115+
5116+ if (end - start > SIZEOF_SIZE_T + ALIGNOF_SIZE_T ) {
5117+ while (!_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )) {
5118+ if ((unsigned char )* p & 0x80 ) {
5119+ return p - start ;
5120+ }
5121+ p ++ ;
5122+ }
5123+ const unsigned char * e = end - SIZEOF_SIZE_T ;
5124+ while (p <= e ) {
5125+ size_t value = (* (const size_t * )p ) & ASCII_CHAR_MASK ;
5126+ if (value ) {
5127+ #if PY_LITTLE_ENDIAN && HAS_CTZ
5128+ return p - start + (ctz (value ) - 7 ) / 8 ;
51235129#else
5124- // big endian and minor compilers are difficult to test.
5125- // fallback to per byte check.
5126- break ;
5130+ // big endian and minor compilers are difficult to test.
5131+ // fallback to per byte check.
5132+ break ;
51275133#endif
5128- }
5129- p += SIZEOF_SIZE_T ;
51305134 }
5131- if (p == end )
5135+ p += SIZEOF_SIZE_T ;
5136+ }
5137+ }
5138+ #if HAS_CTZ
5139+ // This part looks bit tricky, but decoding short ASCII is super important.
5140+ // Since we copy from p to size_t manually, this part works fine with big endian.
5141+ while (p < end ) {
5142+ size_t u = (size_t )(p [0 ]);
5143+ switch (end - p ) {
5144+ default :
5145+ #if SIZEOF_SIZE_T == 8
5146+ u |= (size_t )(p [7 ]) << 56ull ;
5147+ // fall through
5148+ case 7 :
5149+ u |= (size_t )(p [6 ]) << 48ull ;
5150+ // fall through
5151+ case 6 :
5152+ u |= (size_t )(p [5 ]) << 40ull ;
5153+ // fall through
5154+ case 5 :
5155+ u |= (size_t )(p [4 ]) << 32ull ;
5156+ // fall through
5157+ case 4 :
5158+ #endif
5159+ u |= (size_t )(p [3 ]) << 24 ;
5160+ // fall through
5161+ case 3 :
5162+ u |= (size_t )(p [2 ]) << 16 ;
5163+ // fall through
5164+ case 2 :
5165+ u |= (size_t )(p [1 ]) << 8 ;
5166+ break ;
5167+ case 1 :
51325168 break ;
51335169 }
5134- if ((unsigned char )* p & 0x80 )
5170+ if (u & ASCII_CHAR_MASK ) {
5171+ return p - start + (ctz (u & ASCII_CHAR_MASK ) - 7 ) / 8 ;
5172+ }
5173+ p += SIZEOF_SIZE_T ;
5174+ }
5175+ return end - start ;
5176+ #else
5177+ while (p < end ) {
5178+ if ((unsigned char )* p & 0x80 ) {
51355179 break ;
5136- ++ p ;
5180+ }
5181+ p ++ ;
51375182 }
51385183 return p - start ;
5184+ #endif
51395185}
51405186
51415187static inline int scalar_utf8_start_char (unsigned int ch )
@@ -5153,7 +5199,7 @@ static Py_ssize_t utf8_count_codepoints(const unsigned char *s, const unsigned c
51535199{
51545200 Py_ssize_t len = 0 ;
51555201
5156- if (end - s > SIZEOF_SIZE_T * 2 ) {
5202+ if (end - s > SIZEOF_SIZE_T + ALIGNOF_SIZE_T ) {
51575203 while (!_Py_IS_ALIGNED (s , ALIGNOF_SIZE_T )) {
51585204 len += scalar_utf8_start_char (* s ++ );
51595205 }
@@ -5337,7 +5383,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
53375383 const char * starts = s ;
53385384 const char * end = s + size ;
53395385
5340- Py_ssize_t pos = find_first_nonascii (starts , end );
5386+ Py_ssize_t pos = find_first_nonascii (( const unsigned char * ) starts , ( const unsigned char * ) end );
53415387 if (pos == size ) { // fast path: ASCII string.
53425388 PyObject * u = ascii_new (size );
53435389 if (u == NULL ) {
@@ -5355,7 +5401,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
53555401 int maxchr = 127 ;
53565402 Py_ssize_t maxsize = size ;
53575403
5358- unsigned char ch = (unsigned char )s [pos ];
5404+ unsigned char ch = (unsigned char )( s [pos ]) ;
53595405 // error handler other than strict may remove/replace the invalid byte.
53605406 // consumed != NULL allows 1~3 bytes remainings.
53615407 // 0x80 <= ch < 0xc2 is invalid start byte that cause UnicodeDecodeError.
0 commit comments