40
40
41
41
use crate :: cmp;
42
42
use crate :: cmp:: Ordering ;
43
+ use crate :: convert:: TryInto as _;
43
44
use crate :: fmt;
44
45
use crate :: slice:: memchr;
45
46
@@ -370,11 +371,17 @@ pub struct CharSearcher<'a> {
370
371
371
372
// safety invariant: `utf8_size` must be less than 5
372
373
/// The number of bytes `needle` takes up when encoded in utf8.
373
- utf8_size : usize ,
374
+ utf8_size : u8 ,
374
375
/// A utf8 encoded copy of the `needle`
375
376
utf8_encoded : [ u8 ; 4 ] ,
376
377
}
377
378
379
+ impl CharSearcher < ' _ > {
380
+ fn utf8_size ( & self ) -> usize {
381
+ self . utf8_size . into ( )
382
+ }
383
+ }
384
+
378
385
unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
379
386
#[ inline]
380
387
fn haystack ( & self ) -> & ' a str {
@@ -414,7 +421,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
414
421
let bytes = self . haystack . as_bytes ( ) . get ( self . finger ..self . finger_back ) ?;
415
422
// the last byte of the utf8 encoded needle
416
423
// SAFETY: we have an invariant that `utf8_size < 5`
417
- let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size - 1 ) } ;
424
+ let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size ( ) - 1 ) } ;
418
425
if let Some ( index) = memchr:: memchr ( last_byte, bytes) {
419
426
// The new finger is the index of the byte we found,
420
427
// plus one, since we memchr'd for the last byte of the character.
@@ -434,10 +441,10 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
434
441
// find something. When we find something the `finger` will be set
435
442
// to a UTF8 boundary.
436
443
self . finger += index + 1 ;
437
- if self . finger >= self . utf8_size {
438
- let found_char = self . finger - self . utf8_size ;
444
+ if self . finger >= self . utf8_size ( ) {
445
+ let found_char = self . finger - self . utf8_size ( ) ;
439
446
if let Some ( slice) = self . haystack . as_bytes ( ) . get ( found_char..self . finger ) {
440
- if slice == & self . utf8_encoded [ 0 ..self . utf8_size ] {
447
+ if slice == & self . utf8_encoded [ 0 ..self . utf8_size ( ) ] {
441
448
return Some ( ( found_char, self . finger ) ) ;
442
449
}
443
450
}
@@ -482,7 +489,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
482
489
let bytes = haystack. get ( self . finger ..self . finger_back ) ?;
483
490
// the last byte of the utf8 encoded needle
484
491
// SAFETY: we have an invariant that `utf8_size < 5`
485
- let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size - 1 ) } ;
492
+ let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size ( ) - 1 ) } ;
486
493
if let Some ( index) = memchr:: memrchr ( last_byte, bytes) {
487
494
// we searched a slice that was offset by self.finger,
488
495
// add self.finger to recoup the original index
@@ -493,14 +500,14 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
493
500
// char in the paradigm of reverse iteration). For
494
501
// multibyte chars we need to skip down by the number of more
495
502
// bytes they have than ASCII
496
- let shift = self . utf8_size - 1 ;
503
+ let shift = self . utf8_size ( ) - 1 ;
497
504
if index >= shift {
498
505
let found_char = index - shift;
499
- if let Some ( slice) = haystack. get ( found_char..( found_char + self . utf8_size ) ) {
500
- if slice == & self . utf8_encoded [ 0 ..self . utf8_size ] {
506
+ if let Some ( slice) = haystack. get ( found_char..( found_char + self . utf8_size ( ) ) ) {
507
+ if slice == & self . utf8_encoded [ 0 ..self . utf8_size ( ) ] {
501
508
// move finger to before the character found (i.e., at its start index)
502
509
self . finger_back = found_char;
503
- return Some ( ( self . finger_back , self . finger_back + self . utf8_size ) ) ;
510
+ return Some ( ( self . finger_back , self . finger_back + self . utf8_size ( ) ) ) ;
504
511
}
505
512
}
506
513
}
@@ -542,7 +549,12 @@ impl<'a> Pattern<'a> for char {
542
549
#[ inline]
543
550
fn into_searcher ( self , haystack : & ' a str ) -> Self :: Searcher {
544
551
let mut utf8_encoded = [ 0 ; 4 ] ;
545
- let utf8_size = self . encode_utf8 ( & mut utf8_encoded) . len ( ) ;
552
+ let utf8_size = self
553
+ . encode_utf8 ( & mut utf8_encoded)
554
+ . len ( )
555
+ . try_into ( )
556
+ . expect ( "char len should be less than 255" ) ;
557
+
546
558
CharSearcher {
547
559
haystack,
548
560
finger : 0 ,
0 commit comments