@@ -5036,70 +5036,15 @@ PyUnicode_DecodeUTF8(const char *s,
50365036# error C 'size_t' size should be either 4 or 8!
50375037#endif
50385038
5039- static Py_ssize_t
5040- ascii_decode (const char * start , const char * end , Py_UCS1 * dest )
5041- {
5042- const char * p = start ;
5043-
5044- #if SIZEOF_SIZE_T <= SIZEOF_VOID_P
5045- if (_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )
5046- && _Py_IS_ALIGNED (dest , ALIGNOF_SIZE_T ))
5047- {
5048- /* Fast path, see in STRINGLIB(utf8_decode) for
5049- an explanation. */
5050- /* Help allocation */
5051- const char * _p = p ;
5052- Py_UCS1 * q = dest ;
5053- while (_p + SIZEOF_SIZE_T <= end ) {
5054- size_t value = * (const size_t * ) _p ;
5055- if (value & ASCII_CHAR_MASK )
5056- break ;
5057- * ((size_t * )q ) = value ;
5058- _p += SIZEOF_SIZE_T ;
5059- q += SIZEOF_SIZE_T ;
5060- }
5061- p = _p ;
5062- while (p < end ) {
5063- if ((unsigned char )* p & 0x80 )
5064- break ;
5065- * q ++ = * p ++ ;
5066- }
5067- return p - start ;
5068- }
5069- #endif
5070- while (p < end ) {
5071- /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
5072- for an explanation. */
5073- if (_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )) {
5074- /* Help allocation */
5075- const char * _p = p ;
5076- while (_p + SIZEOF_SIZE_T <= end ) {
5077- size_t value = * (const size_t * ) _p ;
5078- if (value & ASCII_CHAR_MASK )
5079- break ;
5080- _p += SIZEOF_SIZE_T ;
5081- }
5082- p = _p ;
5083- if (_p == end )
5084- break ;
5085- }
5086- if ((unsigned char )* p & 0x80 )
5087- break ;
5088- ++ p ;
5089- }
5090- memcpy (dest , start , p - start );
5091- return p - start ;
5092- }
5093-
50945039#if (defined(__clang__ ) || defined(__GNUC__ ))
5095- #define HAS_CTZ 1
5040+ #define HAVE_CTZ 1
50965041static inline unsigned int
50975042ctz (size_t v )
50985043{
50995044 return __builtin_ctzll ((unsigned long long )v );
51005045}
51015046#elif defined(_MSC_VER )
5102- #define HAS_CTZ 1
5047+ #define HAVE_CTZ 1
51035048static inline unsigned int
51045049ctz (size_t v )
51055050{
@@ -5113,24 +5058,79 @@ ctz(size_t v)
51135058}
51145059#endif
51155060
5061+ #if HAVE_CTZ
5062+ // load p[0]..p[size-1] as a little-endian size_t
5063+ // without unaligned access nor read ahead.
5064+ static size_t
5065+ load_unaligned (const unsigned char * p , size_t size )
5066+ {
5067+ assert (0 <= size && size <= SIZEOF_SIZE_T );
5068+ union {
5069+ size_t s ;
5070+ unsigned char b [SIZEOF_SIZE_T ];
5071+ } u ;
5072+ u .s = 0 ;
5073+ switch (size ) {
5074+ case 8 :
5075+ u .b [7 ] = p [7 ];
5076+ // fall through
5077+ case 7 :
5078+ u .b [6 ] = p [6 ];
5079+ // fall through
5080+ case 6 :
5081+ u .b [5 ] = p [5 ];
5082+ // fall through
5083+ case 5 :
5084+ u .b [4 ] = p [4 ];
5085+ // fall through
5086+ case 4 :
5087+ u .b [3 ] = p [3 ];
5088+ // fall through
5089+ case 3 :
5090+ u .b [2 ] = p [2 ];
5091+ // fall through
5092+ case 2 :
5093+ u .b [1 ] = p [1 ];
5094+ // fall through
5095+ case 1 :
5096+ u .b [0 ] = p [0 ];
5097+ break ;
5098+ case 0 :
5099+ break ;
5100+ default :
5101+ Py_UNREACHABLE ();
5102+ }
5103+ return u .s ;
5104+ }
5105+ #endif
5106+
51165107static Py_ssize_t
51175108find_first_nonascii (const unsigned char * start , const unsigned char * end )
51185109{
51195110 const unsigned char * p = start ;
51205111
5121- if (end - start > SIZEOF_SIZE_T + ALIGNOF_SIZE_T ) {
5122- while (!_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )) {
5123- if ((unsigned char )* p & 0x80 ) {
5112+ if (end - start >= SIZEOF_SIZE_T ) {
5113+ const unsigned char * p2 = _Py_ALIGN_UP (p , SIZEOF_SIZE_T );
5114+ #ifdef HAVE_CTZ
5115+ size_t u = load_unaligned (p , p2 - p ) & ASCII_CHAR_MASK ;
5116+ if (u ) {
5117+ return p - start + (ctz (u ) - 7 ) / 8 ;
5118+ }
5119+ p = p2 ;
5120+ #else
5121+ while (p < p2 ) {
5122+ if (* p & 0x80 ) {
51245123 return p - start ;
51255124 }
51265125 p ++ ;
51275126 }
5127+ #endif
51285128 const unsigned char * e = end - SIZEOF_SIZE_T ;
51295129 while (p <= e ) {
5130- size_t value = (* (const size_t * )p ) & ASCII_CHAR_MASK ;
5131- if (value ) {
5132- #if PY_LITTLE_ENDIAN && HAS_CTZ
5133- return p - start + (ctz (value ) - 7 ) / 8 ;
5130+ size_t u = (* (const size_t * )p ) & ASCII_CHAR_MASK ;
5131+ if (u ) {
5132+ #if PY_LITTLE_ENDIAN && HAVE_CTZ
5133+ return p - start + (ctz (u ) - 7 ) / 8 ;
51345134#else
51355135 // big endian and minor compilers are difficult to test.
51365136 // fallback to per byte check.
@@ -5140,47 +5140,15 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
51405140 p += SIZEOF_SIZE_T ;
51415141 }
51425142 }
5143- #if HAS_CTZ
5144- // This part looks bit tricky, but decoding short ASCII is super important.
5145- // Since we copy from p to size_t manually, this part works fine with big endian.
5146- while (p < end ) {
5147- size_t u = (size_t )(p [0 ]);
5148- switch (end - p ) {
5149- default :
5150- #if SIZEOF_SIZE_T == 8
5151- u |= (size_t )(p [7 ]) << 56ull ;
5152- // fall through
5153- case 7 :
5154- u |= (size_t )(p [6 ]) << 48ull ;
5155- // fall through
5156- case 6 :
5157- u |= (size_t )(p [5 ]) << 40ull ;
5158- // fall through
5159- case 5 :
5160- u |= (size_t )(p [4 ]) << 32ull ;
5161- // fall through
5162- case 4 :
5163- #endif
5164- u |= (size_t )(p [3 ]) << 24 ;
5165- // fall through
5166- case 3 :
5167- u |= (size_t )(p [2 ]) << 16 ;
5168- // fall through
5169- case 2 :
5170- u |= (size_t )(p [1 ]) << 8 ;
5171- break ;
5172- case 1 :
5173- break ;
5174- }
5175- if (u & ASCII_CHAR_MASK ) {
5176- return p - start + (ctz (u & ASCII_CHAR_MASK ) - 7 ) / 8 ;
5177- }
5178- p += SIZEOF_SIZE_T ;
5143+ #if HAVE_CTZ
5144+ size_t u = load_unaligned (p , end - p ) & ASCII_CHAR_MASK ;
5145+ if (u ) {
5146+ return p - start + (ctz (u ) - 7 ) / 8 ;
51795147 }
51805148 return end - start ;
51815149#else
51825150 while (p < end ) {
5183- if (( unsigned char ) * p & 0x80 ) {
5151+ if (* p & 0x80 ) {
51845152 break ;
51855153 }
51865154 p ++ ;
@@ -5204,7 +5172,7 @@ static Py_ssize_t utf8_count_codepoints(const unsigned char *s, const unsigned c
52045172{
52055173 Py_ssize_t len = 0 ;
52065174
5207- if (end - s > SIZEOF_SIZE_T + ALIGNOF_SIZE_T ) {
5175+ if (end - s >= SIZEOF_SIZE_T ) {
52085176 while (!_Py_IS_ALIGNED (s , ALIGNOF_SIZE_T )) {
52095177 len += scalar_utf8_start_char (* s ++ );
52105178 }
@@ -5235,6 +5203,39 @@ static Py_ssize_t utf8_count_codepoints(const unsigned char *s, const unsigned c
52355203 return len ;
52365204}
52375205
5206+ static Py_ssize_t
5207+ ascii_decode (const char * start , const char * end , Py_UCS1 * dest )
5208+ {
5209+ #if SIZEOF_SIZE_T <= SIZEOF_VOID_P
5210+ if (_Py_IS_ALIGNED (start , ALIGNOF_SIZE_T )
5211+ && _Py_IS_ALIGNED (dest , ALIGNOF_SIZE_T ))
5212+ {
5213+ /* Fast path, see in STRINGLIB(utf8_decode) for
5214+ an explanation. */
5215+ const char * p = start ;
5216+ Py_UCS1 * q = dest ;
5217+ while (p + SIZEOF_SIZE_T <= end ) {
5218+ size_t value = * (const size_t * ) p ;
5219+ if (value & ASCII_CHAR_MASK )
5220+ break ;
5221+ * ((size_t * )q ) = value ;
5222+ p += SIZEOF_SIZE_T ;
5223+ q += SIZEOF_SIZE_T ;
5224+ }
5225+ while (p < end ) {
5226+ if ((unsigned char )* p & 0x80 )
5227+ break ;
5228+ * q ++ = * p ++ ;
5229+ }
5230+ return p - start ;
5231+ }
5232+ #endif
5233+ Py_ssize_t pos = find_first_nonascii ((const unsigned char * )start ,
5234+ (const unsigned char * )end );
5235+ memcpy (dest , start , pos );
5236+ return pos ;
5237+ }
5238+
52385239static int
52395240unicode_decode_utf8_impl (_PyUnicodeWriter * writer ,
52405241 const char * starts , const char * s , const char * end ,
0 commit comments