@@ -1232,26 +1232,31 @@ fn contains_nonascii(x: usize) -> bool {
1232
1232
/// invalid sequence.
1233
1233
#[ inline( always) ]
1234
1234
fn run_utf8_validation ( v : & [ u8 ] ) -> Result < ( ) , Utf8Error > {
1235
- let mut offset = 0 ;
1235
+ let mut index = 0 ;
1236
1236
let len = v. len ( ) ;
1237
- while offset < len {
1238
- let old_offset = offset;
1237
+
1238
+ let usize_bytes = mem:: size_of :: < usize > ( ) ;
1239
+ let ascii_block_size = 2 * usize_bytes;
1240
+ let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 } ;
1241
+
1242
+ while index < len {
1243
+ let old_offset = index;
1239
1244
macro_rules! err { ( ) => { {
1240
1245
return Err ( Utf8Error {
1241
1246
valid_up_to: old_offset
1242
1247
} )
1243
1248
} } }
1244
1249
1245
1250
macro_rules! next { ( ) => { {
1246
- offset += 1 ;
1251
+ index += 1 ;
1247
1252
// we needed data, but there was none: error!
1248
- if offset >= len {
1253
+ if index >= len {
1249
1254
err!( )
1250
1255
}
1251
- v[ offset ]
1256
+ v[ index ]
1252
1257
} } }
1253
1258
1254
- let first = v[ offset ] ;
1259
+ let first = v[ index ] ;
1255
1260
if first >= 128 {
1256
1261
let w = UTF8_CHAR_WIDTH [ first as usize ] ;
1257
1262
let second = next ! ( ) ;
@@ -1294,38 +1299,32 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1294
1299
}
1295
1300
_ => err ! ( )
1296
1301
}
1297
- offset += 1 ;
1302
+ index += 1 ;
1298
1303
} else {
1299
1304
// Ascii case, try to skip forward quickly.
1300
1305
// When the pointer is aligned, read 2 words of data per iteration
1301
1306
// until we find a word containing a non-ascii byte.
1302
- let usize_bytes = mem:: size_of :: < usize > ( ) ;
1303
- let bytes_per_iteration = 2 * usize_bytes;
1304
1307
let ptr = v. as_ptr ( ) ;
1305
- let align = ( ptr as usize + offset ) & ( usize_bytes - 1 ) ;
1308
+ let align = ( ptr as usize + index ) & ( usize_bytes - 1 ) ;
1306
1309
if align == 0 {
1307
- if len >= bytes_per_iteration {
1308
- while offset <= len - bytes_per_iteration {
1309
- unsafe {
1310
- let u = * ( ptr. offset ( offset as isize ) as * const usize ) ;
1311
- let v = * ( ptr. offset ( ( offset + usize_bytes) as isize ) as * const usize ) ;
1312
-
1313
- // break if there is a nonascii byte
1314
- let zu = contains_nonascii ( u) ;
1315
- let zv = contains_nonascii ( v) ;
1316
- if zu || zv {
1317
- break ;
1318
- }
1310
+ while index < blocks_end {
1311
+ unsafe {
1312
+ let block = ptr. offset ( index as isize ) as * const usize ;
1313
+ // break if there is a nonascii byte
1314
+ let zu = contains_nonascii ( * block) ;
1315
+ let zv = contains_nonascii ( * block. offset ( 1 ) ) ;
1316
+ if zu | zv {
1317
+ break ;
1319
1318
}
1320
- offset += bytes_per_iteration;
1321
1319
}
1320
+ index += ascii_block_size;
1322
1321
}
1323
1322
// step from the point where the wordwise loop stopped
1324
- while offset < len && v[ offset ] < 128 {
1325
- offset += 1 ;
1323
+ while index < len && v[ index ] < 128 {
1324
+ index += 1 ;
1326
1325
}
1327
1326
} else {
1328
- offset += 1 ;
1327
+ index += 1 ;
1329
1328
}
1330
1329
}
1331
1330
}
0 commit comments