diff --git a/compiler/rustc_errors/src/lib.rs b/compiler/rustc_errors/src/lib.rs index 83b733d4c0677..b310b191d5273 100644 --- a/compiler/rustc_errors/src/lib.rs +++ b/compiler/rustc_errors/src/lib.rs @@ -471,6 +471,8 @@ pub enum StashKey { /// When an invalid lifetime e.g. `'2` should be reinterpreted /// as a char literal in the parser LifetimeIsChar, + /// When an invalid lifetime e.g. `'🐱` contains emoji. + LifetimeContainsEmoji, /// Maybe there was a typo where a comma was forgotten before /// FRU syntax MaybeFruTypo, diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 6e815863d06ff..e6f04fe0aaa63 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -95,7 +95,7 @@ pub enum TokenKind { Literal { kind: LiteralKind, suffix_start: u32 }, /// "'a" - Lifetime { starts_with_number: bool }, + Lifetime { starts_with_number: bool, contains_emoji: bool }, // One-char tokens: /// ";" @@ -630,7 +630,13 @@ impl Cursor<'_> { // If the first symbol is valid for identifier, it can be a lifetime. // Also check if it's a number for a better error reporting (so '0 will // be reported as invalid lifetime and not as unterminated char literal). - is_id_start(self.first()) || self.first().is_digit(10) + // We also have to account for potential `'🐱` emojis to avoid reporting + // it as an unterminated char literal. + is_id_start(self.first()) + || self.first().is_digit(10) + // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode + // 5.0, but Unicode is already newer than this. + || unic_emoji_char::is_emoji(self.first()) }; if !can_be_a_lifetime { @@ -643,16 +649,33 @@ impl Cursor<'_> { return Literal { kind, suffix_start }; } - // Either a lifetime or a character literal with - // length greater than 1. + // Either a lifetime or a character literal. let starts_with_number = self.first().is_digit(10); + let mut contains_emoji = false; - // Skip the literal contents. - // First symbol can be a number (which isn't a valid identifier start), - // so skip it without any checks. - self.bump(); - self.eat_while(is_id_continue); + // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode + // 5.0, but Unicode is already newer than this. + if unic_emoji_char::is_emoji(self.first()) { + contains_emoji = true; + } else { + // Skip the literal contents. + // First symbol can be a number (which isn't a valid identifier start), + // so skip it without any checks. + self.bump(); + } + self.eat_while(|c| { + if is_id_continue(c) { + true + // FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode + // 5.0, but Unicode is already newer than this. + } else if unic_emoji_char::is_emoji(c) { + contains_emoji = true; + true + } else { + false + } + }); // Check if after skipping literal contents we've met a closing // single quote (which means that user attempted to create a @@ -662,7 +685,7 @@ impl Cursor<'_> { let kind = Char { terminated: true }; Literal { kind, suffix_start: self.pos_within_token() } } else { - Lifetime { starts_with_number } + Lifetime { starts_with_number, contains_emoji } } } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index bd998ed91d977..37449aaabed8b 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -200,16 +200,21 @@ impl<'a> StringReader<'a> { }; token::Literal(token::Lit { kind, symbol, suffix }) } - rustc_lexer::TokenKind::Lifetime { starts_with_number } => { + rustc_lexer::TokenKind::Lifetime { starts_with_number, contains_emoji } => { // Include the leading `'` in the real identifier, for macro // expansion purposes. See #12512 for the gory details of why // this is necessary. let lifetime_name = self.str_from(start); if starts_with_number { let span = self.mk_sp(start, self.pos); - let mut diag = self.sess.struct_err("lifetimes cannot start with a number"); + let mut diag = self.sess.struct_err("lifetimes or labels cannot start with a number"); diag.set_span(span); diag.stash(span, StashKey::LifetimeIsChar); + } else if contains_emoji { + let span = self.mk_sp(start, self.pos); + let mut diag = self.sess.struct_err("lifetimes or labels cannot contain emojis"); + diag.set_span(span); + diag.stash(span, StashKey::LifetimeContainsEmoji); } let ident = Symbol::intern(lifetime_name); token::Lifetime(ident) diff --git a/tests/ui/lexer/issue-108019-bad-emoji-recovery.rs b/tests/ui/lexer/issue-108019-bad-emoji-recovery.rs new file mode 100644 index 0000000000000..f0f8622456010 --- /dev/null +++ b/tests/ui/lexer/issue-108019-bad-emoji-recovery.rs @@ -0,0 +1,45 @@ +#![allow(unused_labels)] + +// FIXME(#108019): outdated Unicode table +// fn foo() { +// '🥺 loop { +// break +// } +// } + +fn bar() { + '🐱 loop { + //~^ ERROR labeled expression must be followed by `:` + //~| ERROR lifetimes or labels cannot contain emojis + break + } +} + +fn qux() { + 'a🐱 loop { + //~^ ERROR labeled expression must be followed by `:` + //~| ERROR lifetimes or labels cannot contain emojis + break + } +} + +fn quux() { + '1🐱 loop { + //~^ ERROR labeled expression must be followed by `:` + //~| ERROR lifetimes or labels cannot start with a number + break + } +} + +fn x<'🐱>() -> &'🐱 () { + //~^ ERROR lifetimes or labels cannot contain emojis + //~| ERROR lifetimes or labels cannot contain emojis + &() +} + +fn y() { + 'a🐱: loop {} + //~^ ERROR lifetimes or labels cannot contain emojis +} + +fn main() {} diff --git a/tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr b/tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr new file mode 100644 index 0000000000000..be77ffdea349f --- /dev/null +++ b/tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr @@ -0,0 +1,86 @@ +error: labeled expression must be followed by `:` + --> $DIR/issue-108019-bad-emoji-recovery.rs:11:5 + | +LL | '🐱 loop { + | ^--- help: add `:` after the label + | | + | _____the label + | | +LL | | +LL | | +LL | | break +LL | | } + | |_____^ + | + = note: labels are used before loops and blocks, allowing e.g., `break 'label` to them + +error: labeled expression must be followed by `:` + --> $DIR/issue-108019-bad-emoji-recovery.rs:19:5 + | +LL | 'a🐱 loop { + | ^---- help: add `:` after the label + | | + | _____the label + | | +LL | | +LL | | +LL | | break +LL | | } + | |_____^ + | + = note: labels are used before loops and blocks, allowing e.g., `break 'label` to them + +error: labeled expression must be followed by `:` + --> $DIR/issue-108019-bad-emoji-recovery.rs:27:5 + | +LL | '1🐱 loop { + | ^---- help: add `:` after the label + | | + | _____the label + | | +LL | | +LL | | +LL | | break +LL | | } + | |_____^ + | + = note: labels are used before loops and blocks, allowing e.g., `break 'label` to them + +error: lifetimes or labels cannot contain emojis + --> $DIR/issue-108019-bad-emoji-recovery.rs:11:5 + | +LL | '🐱 loop { + | ^^^ + +error: lifetimes or labels cannot contain emojis + --> $DIR/issue-108019-bad-emoji-recovery.rs:19:5 + | +LL | 'a🐱 loop { + | ^^^^ + +error: lifetimes or labels cannot start with a number + --> $DIR/issue-108019-bad-emoji-recovery.rs:27:5 + | +LL | '1🐱 loop { + | ^^^^ + +error: lifetimes or labels cannot contain emojis + --> $DIR/issue-108019-bad-emoji-recovery.rs:34:6 + | +LL | fn x<'🐱>() -> &'🐱 () { + | ^^^ + +error: lifetimes or labels cannot contain emojis + --> $DIR/issue-108019-bad-emoji-recovery.rs:34:16 + | +LL | fn x<'🐱>() -> &'🐱 () { + | ^^^ + +error: lifetimes or labels cannot contain emojis + --> $DIR/issue-108019-bad-emoji-recovery.rs:41:5 + | +LL | 'a🐱: loop {} + | ^^^^ + +error: aborting due to 9 previous errors +