@@ -178,6 +178,8 @@ pub struct GraphemeCursor {
178
178
// Set if a call to `prev_boundary` or `next_boundary` was suspended due
179
179
// to needing more input.
180
180
resuming : bool ,
181
+ // Cached grapheme category and associated scalar value range.
182
+ grapheme_cat_cache : ( u32 , u32 , GraphemeCat ) ,
181
183
}
182
184
183
185
/// An error return indicating that not enough content was available in the
@@ -276,9 +278,20 @@ impl GraphemeCursor {
276
278
pre_context_offset : None ,
277
279
ris_count : None ,
278
280
resuming : false ,
281
+ grapheme_cat_cache : ( 0 , 0 , GraphemeCat :: GC_Control ) ,
279
282
}
280
283
}
281
284
285
+ fn grapheme_category ( & mut self , ch : char ) -> GraphemeCat {
286
+ use tables:: grapheme as gr;
287
+ // If this char isn't within the cached range, update the cache to the
288
+ // range that includes it.
289
+ if ( ch as u32 ) < self . grapheme_cat_cache . 0 || ( ch as u32 ) > self . grapheme_cat_cache . 1 {
290
+ self . grapheme_cat_cache = gr:: grapheme_category ( ch) ;
291
+ }
292
+ self . grapheme_cat_cache . 2
293
+ }
294
+
282
295
// Not sure I'm gonna keep this, the advantage over new() seems thin.
283
296
284
297
/// Set the cursor to a new location in the same string.
@@ -349,7 +362,7 @@ impl GraphemeCursor {
349
362
self . pre_context_offset = None ;
350
363
if self . is_extended && chunk_start + chunk. len ( ) == self . offset {
351
364
let ch = chunk. chars ( ) . rev ( ) . next ( ) . unwrap ( ) ;
352
- if gr :: grapheme_category ( ch) == gr:: GC_Prepend {
365
+ if self . grapheme_category ( ch) == gr:: GC_Prepend {
353
366
self . decide ( false ) ; // GB9b
354
367
return ;
355
368
}
@@ -359,7 +372,7 @@ impl GraphemeCursor {
359
372
GraphemeState :: Emoji => self . handle_emoji ( chunk, chunk_start) ,
360
373
_ => if self . cat_before . is_none ( ) && self . offset == chunk. len ( ) + chunk_start {
361
374
let ch = chunk. chars ( ) . rev ( ) . next ( ) . unwrap ( ) ;
362
- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
375
+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
363
376
} ,
364
377
}
365
378
}
@@ -393,7 +406,7 @@ impl GraphemeCursor {
393
406
use tables:: grapheme as gr;
394
407
let mut ris_count = self . ris_count . unwrap_or ( 0 ) ;
395
408
for ch in chunk. chars ( ) . rev ( ) {
396
- if gr :: grapheme_category ( ch) != gr:: GC_Regional_Indicator {
409
+ if self . grapheme_category ( ch) != gr:: GC_Regional_Indicator {
397
410
self . ris_count = Some ( ris_count) ;
398
411
self . decide ( ( ris_count % 2 ) == 0 ) ;
399
412
return ;
@@ -413,13 +426,13 @@ impl GraphemeCursor {
413
426
use tables:: grapheme as gr;
414
427
let mut iter = chunk. chars ( ) . rev ( ) ;
415
428
if let Some ( ch) = iter. next ( ) {
416
- if gr :: grapheme_category ( ch) != gr:: GC_ZWJ {
429
+ if self . grapheme_category ( ch) != gr:: GC_ZWJ {
417
430
self . decide ( true ) ;
418
431
return ;
419
432
}
420
433
}
421
434
for ch in iter {
422
- match gr :: grapheme_category ( ch) {
435
+ match self . grapheme_category ( ch) {
423
436
gr:: GC_Extend => ( ) ,
424
437
gr:: GC_Extended_Pictographic => {
425
438
self . decide ( false ) ;
@@ -481,7 +494,7 @@ impl GraphemeCursor {
481
494
let offset_in_chunk = self . offset - chunk_start;
482
495
if self . cat_after . is_none ( ) {
483
496
let ch = chunk[ offset_in_chunk..] . chars ( ) . next ( ) . unwrap ( ) ;
484
- self . cat_after = Some ( gr :: grapheme_category ( ch) ) ;
497
+ self . cat_after = Some ( self . grapheme_category ( ch) ) ;
485
498
}
486
499
if self . offset == chunk_start {
487
500
let mut need_pre_context = true ;
@@ -497,7 +510,7 @@ impl GraphemeCursor {
497
510
}
498
511
if self . cat_before . is_none ( ) {
499
512
let ch = chunk[ ..offset_in_chunk] . chars ( ) . rev ( ) . next ( ) . unwrap ( ) ;
500
- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
513
+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
501
514
}
502
515
match check_pair ( self . cat_before . unwrap ( ) , self . cat_after . unwrap ( ) ) {
503
516
PairResult :: NotBreak => return self . decision ( false ) ,
@@ -553,7 +566,6 @@ impl GraphemeCursor {
553
566
/// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
554
567
/// ```
555
568
pub fn next_boundary ( & mut self , chunk : & str , chunk_start : usize ) -> Result < Option < usize > , GraphemeIncomplete > {
556
- use tables:: grapheme as gr;
557
569
if self . offset == self . len {
558
570
return Ok ( None ) ;
559
571
}
@@ -562,14 +574,14 @@ impl GraphemeCursor {
562
574
loop {
563
575
if self . resuming {
564
576
if self . cat_after . is_none ( ) {
565
- self . cat_after = Some ( gr :: grapheme_category ( ch) ) ;
577
+ self . cat_after = Some ( self . grapheme_category ( ch) ) ;
566
578
}
567
579
} else {
568
580
self . offset += ch. len_utf8 ( ) ;
569
581
self . state = GraphemeState :: Unknown ;
570
582
self . cat_before = self . cat_after . take ( ) ;
571
583
if self . cat_before . is_none ( ) {
572
- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
584
+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
573
585
}
574
586
if self . cat_before . unwrap ( ) == GraphemeCat :: GC_Regional_Indicator {
575
587
self . ris_count = self . ris_count . map ( |c| c + 1 ) ;
@@ -578,7 +590,7 @@ impl GraphemeCursor {
578
590
}
579
591
if let Some ( next_ch) = iter. next ( ) {
580
592
ch = next_ch;
581
- self . cat_after = Some ( gr :: grapheme_category ( ch) ) ;
593
+ self . cat_after = Some ( self . grapheme_category ( ch) ) ;
582
594
} else if self . offset == self . len {
583
595
self . decide ( true ) ;
584
596
} else {
@@ -629,7 +641,6 @@ impl GraphemeCursor {
629
641
/// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
630
642
/// ```
631
643
pub fn prev_boundary ( & mut self , chunk : & str , chunk_start : usize ) -> Result < Option < usize > , GraphemeIncomplete > {
632
- use tables:: grapheme as gr;
633
644
if self . offset == 0 {
634
645
return Ok ( None ) ;
635
646
}
@@ -644,7 +655,7 @@ impl GraphemeCursor {
644
655
return Err ( GraphemeIncomplete :: PrevChunk ) ;
645
656
}
646
657
if self . resuming {
647
- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
658
+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
648
659
} else {
649
660
self . offset -= ch. len_utf8 ( ) ;
650
661
self . cat_after = self . cat_before . take ( ) ;
@@ -654,12 +665,12 @@ impl GraphemeCursor {
654
665
}
655
666
if let Some ( prev_ch) = iter. next ( ) {
656
667
ch = prev_ch;
657
- self . cat_before = Some ( gr :: grapheme_category ( ch) ) ;
668
+ self . cat_before = Some ( self . grapheme_category ( ch) ) ;
658
669
} else if self . offset == 0 {
659
670
self . decide ( true ) ;
660
671
} else {
661
672
self . resuming = true ;
662
- self . cat_after = Some ( gr :: grapheme_category ( ch) ) ;
673
+ self . cat_after = Some ( self . grapheme_category ( ch) ) ;
663
674
return Err ( GraphemeIncomplete :: PrevChunk ) ;
664
675
}
665
676
}
0 commit comments