Skip to content

Commit 380fa26

Browse files
committed
Don't recover lifetimes/labels containing emojis as character literals
Note that at the time of this commit, `unic-emoji-char` seems to have data tables only up to Unicode 5.0, but Unicode is already newer than this. A newer emoji such as `🥺` will not be recognized as an emoji but older emojis such as `🐱` will.
1 parent c3c6d73 commit 380fa26

File tree

5 files changed

+173
-12
lines changed

5 files changed

+173
-12
lines changed

Diff for: compiler/rustc_errors/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,8 @@ pub enum StashKey {
471471
/// When an invalid lifetime e.g. `'2` should be reinterpreted
472472
/// as a char literal in the parser
473473
LifetimeIsChar,
474+
/// When an invalid lifetime e.g. `'🐱` contains emoji.
475+
LifetimeContainsEmoji,
474476
/// Maybe there was a typo where a comma was forgotten before
475477
/// FRU syntax
476478
MaybeFruTypo,

Diff for: compiler/rustc_lexer/src/lib.rs

+33-10
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ pub enum TokenKind {
9595
Literal { kind: LiteralKind, suffix_start: u32 },
9696

9797
/// "'a"
98-
Lifetime { starts_with_number: bool },
98+
Lifetime { starts_with_number: bool, contains_emoji: bool },
9999

100100
// One-char tokens:
101101
/// ";"
@@ -630,7 +630,13 @@ impl Cursor<'_> {
630630
// If the first symbol is valid for identifier, it can be a lifetime.
631631
// Also check if it's a number for a better error reporting (so '0 will
632632
// be reported as invalid lifetime and not as unterminated char literal).
633-
is_id_start(self.first()) || self.first().is_digit(10)
633+
// We also have to account for potential `'🐱` emojis to avoid reporting
634+
// it as an unterminated char literal.
635+
is_id_start(self.first())
636+
|| self.first().is_digit(10)
637+
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
638+
// 5.0, but Unicode is already newer than this.
639+
|| unic_emoji_char::is_emoji(self.first())
634640
};
635641

636642
if !can_be_a_lifetime {
@@ -643,16 +649,33 @@ impl Cursor<'_> {
643649
return Literal { kind, suffix_start };
644650
}
645651

646-
// Either a lifetime or a character literal with
647-
// length greater than 1.
652+
// Either a lifetime or a character literal.
648653

649654
let starts_with_number = self.first().is_digit(10);
655+
let mut contains_emoji = false;
650656

651-
// Skip the literal contents.
652-
// First symbol can be a number (which isn't a valid identifier start),
653-
// so skip it without any checks.
654-
self.bump();
655-
self.eat_while(is_id_continue);
657+
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
658+
// 5.0, but Unicode is already newer than this.
659+
if unic_emoji_char::is_emoji(self.first()) {
660+
contains_emoji = true;
661+
} else {
662+
// Skip the literal contents.
663+
// First symbol can be a number (which isn't a valid identifier start),
664+
// so skip it without any checks.
665+
self.bump();
666+
}
667+
self.eat_while(|c| {
668+
if is_id_continue(c) {
669+
true
670+
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
671+
// 5.0, but Unicode is already newer than this.
672+
} else if unic_emoji_char::is_emoji(c) {
673+
contains_emoji = true;
674+
true
675+
} else {
676+
false
677+
}
678+
});
656679

657680
// Check if after skipping literal contents we've met a closing
658681
// single quote (which means that user attempted to create a
@@ -662,7 +685,7 @@ impl Cursor<'_> {
662685
let kind = Char { terminated: true };
663686
Literal { kind, suffix_start: self.pos_within_token() }
664687
} else {
665-
Lifetime { starts_with_number }
688+
Lifetime { starts_with_number, contains_emoji }
666689
}
667690
}
668691

Diff for: compiler/rustc_parse/src/lexer/mod.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -200,16 +200,21 @@ impl<'a> StringReader<'a> {
200200
};
201201
token::Literal(token::Lit { kind, symbol, suffix })
202202
}
203-
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
203+
rustc_lexer::TokenKind::Lifetime { starts_with_number, contains_emoji } => {
204204
// Include the leading `'` in the real identifier, for macro
205205
// expansion purposes. See #12512 for the gory details of why
206206
// this is necessary.
207207
let lifetime_name = self.str_from(start);
208208
if starts_with_number {
209209
let span = self.mk_sp(start, self.pos);
210-
let mut diag = self.sess.struct_err("lifetimes cannot start with a number");
210+
let mut diag = self.sess.struct_err("lifetimes or labels cannot start with a number");
211211
diag.set_span(span);
212212
diag.stash(span, StashKey::LifetimeIsChar);
213+
} else if contains_emoji {
214+
let span = self.mk_sp(start, self.pos);
215+
let mut diag = self.sess.struct_err("lifetimes or labels cannot contain emojis");
216+
diag.set_span(span);
217+
diag.stash(span, StashKey::LifetimeContainsEmoji);
213218
}
214219
let ident = Symbol::intern(lifetime_name);
215220
token::Lifetime(ident)

Diff for: tests/ui/lexer/issue-108019-bad-emoji-recovery.rs

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#![allow(unused_labels)]
2+
3+
// FIXME(#108019): outdated Unicode table
4+
// fn foo() {
5+
// '🥺 loop {
6+
// break
7+
// }
8+
// }
9+
10+
fn bar() {
11+
'🐱 loop {
12+
//~^ ERROR labeled expression must be followed by `:`
13+
//~| ERROR lifetimes or labels cannot contain emojis
14+
break
15+
}
16+
}
17+
18+
fn qux() {
19+
'a🐱 loop {
20+
//~^ ERROR labeled expression must be followed by `:`
21+
//~| ERROR lifetimes or labels cannot contain emojis
22+
break
23+
}
24+
}
25+
26+
fn quux() {
27+
'1🐱 loop {
28+
//~^ ERROR labeled expression must be followed by `:`
29+
//~| ERROR lifetimes or labels cannot start with a number
30+
break
31+
}
32+
}
33+
34+
fn x<'🐱>() -> &'🐱 () {
35+
//~^ ERROR lifetimes or labels cannot contain emojis
36+
//~| ERROR lifetimes or labels cannot contain emojis
37+
&()
38+
}
39+
40+
fn y() {
41+
'a🐱: loop {}
42+
//~^ ERROR lifetimes or labels cannot contain emojis
43+
}
44+
45+
fn main() {}
+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
error: labeled expression must be followed by `:`
2+
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
3+
|
4+
LL | '🐱 loop {
5+
| ^--- help: add `:` after the label
6+
| |
7+
| _____the label
8+
| |
9+
LL | |
10+
LL | |
11+
LL | | break
12+
LL | | }
13+
| |_____^
14+
|
15+
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them
16+
17+
error: labeled expression must be followed by `:`
18+
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
19+
|
20+
LL | 'a🐱 loop {
21+
| ^---- help: add `:` after the label
22+
| |
23+
| _____the label
24+
| |
25+
LL | |
26+
LL | |
27+
LL | | break
28+
LL | | }
29+
| |_____^
30+
|
31+
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them
32+
33+
error: labeled expression must be followed by `:`
34+
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
35+
|
36+
LL | '1🐱 loop {
37+
| ^---- help: add `:` after the label
38+
| |
39+
| _____the label
40+
| |
41+
LL | |
42+
LL | |
43+
LL | | break
44+
LL | | }
45+
| |_____^
46+
|
47+
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them
48+
49+
error: lifetimes or labels cannot contain emojis
50+
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
51+
|
52+
LL | '🐱 loop {
53+
| ^^^
54+
55+
error: lifetimes or labels cannot contain emojis
56+
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
57+
|
58+
LL | 'a🐱 loop {
59+
| ^^^^
60+
61+
error: lifetimes or labels cannot start with a number
62+
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
63+
|
64+
LL | '1🐱 loop {
65+
| ^^^^
66+
67+
error: lifetimes or labels cannot contain emojis
68+
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:6
69+
|
70+
LL | fn x<'🐱>() -> &'🐱 () {
71+
| ^^^
72+
73+
error: lifetimes or labels cannot contain emojis
74+
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:16
75+
|
76+
LL | fn x<'🐱>() -> &'🐱 () {
77+
| ^^^
78+
79+
error: lifetimes or labels cannot contain emojis
80+
--> $DIR/issue-108019-bad-emoji-recovery.rs:41:5
81+
|
82+
LL | 'a🐱: loop {}
83+
| ^^^^
84+
85+
error: aborting due to 9 previous errors
86+

0 commit comments

Comments
 (0)