@@ -95,7 +95,7 @@ pub enum TokenKind {
9595 Literal { kind : LiteralKind , suffix_start : u32 } ,
9696
9797 /// "'a"
98- Lifetime { starts_with_number : bool } ,
98+ Lifetime { starts_with_number : bool , contains_emoji : bool } ,
9999
100100 // One-char tokens:
101101 /// ";"
@@ -630,7 +630,13 @@ impl Cursor<'_> {
630630 // If the first symbol is valid for identifier, it can be a lifetime.
631631 // Also check if it's a number for a better error reporting (so '0 will
632632 // be reported as invalid lifetime and not as unterminated char literal).
633- is_id_start ( self . first ( ) ) || self . first ( ) . is_digit ( 10 )
633+ // We also have to account for potential `'🐱` emojis to avoid reporting
634+ // it as an unterminated char literal.
635+ is_id_start ( self . first ( ) )
636+ || self . first ( ) . is_digit ( 10 )
637+ // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
638+ // 5.0, but Unicode is already newer than this.
639+ || unic_emoji_char:: is_emoji ( self . first ( ) )
634640 } ;
635641
636642 if !can_be_a_lifetime {
@@ -643,16 +649,33 @@ impl Cursor<'_> {
643649 return Literal { kind, suffix_start } ;
644650 }
645651
646- // Either a lifetime or a character literal with
647- // length greater than 1.
652+ // Either a lifetime or a character literal.
648653
649654 let starts_with_number = self . first ( ) . is_digit ( 10 ) ;
655+ let mut contains_emoji = false ;
650656
651- // Skip the literal contents.
652- // First symbol can be a number (which isn't a valid identifier start),
653- // so skip it without any checks.
654- self . bump ( ) ;
655- self . eat_while ( is_id_continue) ;
657+ // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
658+ // 5.0, but Unicode is already newer than this.
659+ if unic_emoji_char:: is_emoji ( self . first ( ) ) {
660+ contains_emoji = true ;
661+ } else {
662+ // Skip the literal contents.
663+ // First symbol can be a number (which isn't a valid identifier start),
664+ // so skip it without any checks.
665+ self . bump ( ) ;
666+ }
667+ self . eat_while ( |c| {
668+ if is_id_continue ( c) {
669+ true
670+ // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
671+ // 5.0, but Unicode is already newer than this.
672+ } else if unic_emoji_char:: is_emoji ( c) {
673+ contains_emoji = true ;
674+ true
675+ } else {
676+ false
677+ }
678+ } ) ;
656679
657680 // Check if after skipping literal contents we've met a closing
658681 // single quote (which means that user attempted to create a
@@ -662,7 +685,7 @@ impl Cursor<'_> {
662685 let kind = Char { terminated : true } ;
663686 Literal { kind, suffix_start : self . pos_within_token ( ) }
664687 } else {
665- Lifetime { starts_with_number }
688+ Lifetime { starts_with_number, contains_emoji }
666689 }
667690 }
668691
0 commit comments