Skip to content

Commit

Permalink
Auto merge of #85359 - lrh2000:reserved-prefixes, r=nikomatsakis
Browse files Browse the repository at this point in the history
Reserve prefixed identifiers and literals (RFC 3101)

This PR denies any identifiers immediately followed by one of three tokens `"`, `'` or `#`, which is stricter than the requirements of RFC 3101 but may be necessary according to the discussion at [Zulip].

[Zulip]: https://rust-lang.zulipchat.com/#narrow/stream/268952-edition-2021/topic/reserved.20prefixes/near/238470099

The tracking issue #84599 says we'll add a feature gate named `reserved_prefixes`, but I don't think I can do this because it is impossible for the lexer to know whether a feature is enabled or not. I guess determining the behavior by the edition information should be enough.

Fixes #84599
  • Loading branch information
bors committed Jun 27, 2021
2 parents a4f832b + f6dd137 commit e8cb1a4
Show file tree
Hide file tree
Showing 16 changed files with 518 additions and 9 deletions.
22 changes: 17 additions & 5 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ pub enum TokenKind {
Ident,
/// "r#ident"
RawIdent,
/// An unknown prefix like `foo#`, `foo'`, `foo"`. Note that only the
/// prefix (`foo`) is included in the token, not the separator (which is
/// lexed as its own distinct token). In Rust 2021 and later, reserved
/// prefixes are reported as errors; in earlier editions, they result in a
/// (allowed by default) lint, and are treated as regular identifier
/// tokens.
UnknownPrefix,
/// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
Literal { kind: LiteralKind, suffix_start: usize },
/// "'a"
Expand Down Expand Up @@ -323,7 +330,7 @@ impl Cursor<'_> {
let kind = RawStr { n_hashes, err };
Literal { kind, suffix_start }
}
_ => self.ident(),
_ => self.ident_or_unknown_prefix(),
},

// Byte literal, byte string literal, raw byte string literal or identifier.
Expand Down Expand Up @@ -358,12 +365,12 @@ impl Cursor<'_> {
let kind = RawByteStr { n_hashes, err };
Literal { kind, suffix_start }
}
_ => self.ident(),
_ => self.ident_or_unknown_prefix(),
},

// Identifier (this should be checked after other variant that can
// start as identifier).
c if is_id_start(c) => self.ident(),
c if is_id_start(c) => self.ident_or_unknown_prefix(),

// Numeric literal.
c @ '0'..='9' => {
Expand Down Expand Up @@ -487,11 +494,16 @@ impl Cursor<'_> {
RawIdent
}

fn ident(&mut self) -> TokenKind {
fn ident_or_unknown_prefix(&mut self) -> TokenKind {
debug_assert!(is_id_start(self.prev()));
// Start is already eaten, eat the rest of identifier.
self.eat_while(is_id_continue);
Ident
// Known prefixes must have been handled earlier. So if
// we see a prefix here, it is definitely a unknown prefix.
match self.first() {
'#' | '"' | '\'' => UnknownPrefix,
_ => Ident,
}
}

fn number(&mut self, first_digit: char) -> LiteralKind {
Expand Down
9 changes: 9 additions & 0 deletions compiler/rustc_lint/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,15 @@ pub trait LintContext: Sized {
BuiltinLintDiagnostics::OrPatternsBackCompat(span,suggestion) => {
db.span_suggestion(span, "use pat_param to preserve semantics", suggestion, Applicability::MachineApplicable);
}
BuiltinLintDiagnostics::ReservedPrefix(span) => {
db.span_label(span, "unknown prefix");
db.span_suggestion_verbose(
span.shrink_to_hi(),
"insert whitespace here to avoid this being parsed as a prefix in Rust 2021",
" ".into(),
Applicability::MachineApplicable,
);
}
}
// Rewrap `db`, and pass control to the user.
decorate(LintDiagnosticBuilder::new(db));
Expand Down
37 changes: 37 additions & 0 deletions compiler/rustc_lint_defs/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2973,6 +2973,7 @@ declare_lint_pass! {
OR_PATTERNS_BACK_COMPAT,
LARGE_ASSIGNMENTS,
FUTURE_PRELUDE_COLLISION,
RESERVED_PREFIX,
]
}

Expand Down Expand Up @@ -3263,3 +3264,39 @@ declare_lint! {
reason: FutureIncompatibilityReason::EditionError(Edition::Edition2021),
};
}

declare_lint! {
/// The `reserved_prefix` lint detects identifiers that will be parsed as a
/// prefix instead in Rust 2021.
///
/// ### Example
///
/// ```rust,compile_fail
/// #![deny(reserved_prefix)]
///
/// macro_rules! m {
/// (z $x:expr) => ();
/// }
///
/// m!(z"hey");
/// ```
///
/// {{produces}}
///
/// ### Explanation
///
/// In Rust 2015 and 2018, `z"hey"` is two tokens: the identifier `z`
/// followed by the string literal `"hey"`. In Rust 2021, the `z` is
/// considered a prefix for `"hey"`.
///
/// This lint suggests to add whitespace between the `z` and `"hey"` tokens
/// to keep them separated in Rust 2021.
pub RESERVED_PREFIX,
Allow,
"identifiers that will be parsed as a prefix in Rust 2021",
@future_incompatible = FutureIncompatibleInfo {
reference: "issue #84978 <https://github.com/rust-lang/rust/issues/84978>",
reason: FutureIncompatibilityReason::EditionError(Edition::Edition2021),
};
crate_level_only
}
1 change: 1 addition & 0 deletions compiler/rustc_lint_defs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ pub enum BuiltinLintDiagnostics {
ExternDepSpec(String, ExternDepSpec),
ProcMacroBackCompat(String),
OrPatternsBackCompat(Span, String),
ReservedPrefix(Span),
}

/// Lints that are buffered up early on in the `Session` before the
Expand Down
50 changes: 47 additions & 3 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use rustc_ast::ast::AttrStyle;
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
use rustc_ast::tokenstream::{Spacing, TokenStream};
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
use rustc_lexer::unescape::{self, Mode};
use rustc_lexer::{Base, DocStyle, RawStrError};
use rustc_session::lint::builtin::RESERVED_PREFIX;
use rustc_session::lint::BuiltinLintDiagnostics;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{sym, Symbol};
use rustc_span::{BytePos, Pos, Span};
use rustc_span::{edition::Edition, BytePos, Pos, Span};

use tracing::debug;

Expand Down Expand Up @@ -166,12 +168,18 @@ impl<'a> StringReader<'a> {
self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
}
rustc_lexer::TokenKind::Whitespace => return None,
rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {
rustc_lexer::TokenKind::Ident
| rustc_lexer::TokenKind::RawIdent
| rustc_lexer::TokenKind::UnknownPrefix => {
let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
let is_unknown_prefix = token == rustc_lexer::TokenKind::UnknownPrefix;
let mut ident_start = start;
if is_raw_ident {
ident_start = ident_start + BytePos(2);
}
if is_unknown_prefix {
self.report_unknown_prefix(start);
}
let sym = nfc_normalize(self.str_from(ident_start));
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
Expand Down Expand Up @@ -491,6 +499,42 @@ impl<'a> StringReader<'a> {
FatalError.raise()
}

// RFC 3101 introduced the idea of (reserved) prefixes. As of Rust 2021,
// using a (unknown) prefix is an error. In earlier editions, however, they
// only result in a (allowed by default) lint, and are treated as regular
// identifier tokens.
fn report_unknown_prefix(&self, start: BytePos) {
let prefix_span = self.mk_sp(start, self.pos);
let msg = format!("prefix `{}` is unknown", self.str_from_to(start, self.pos));

let expn_data = prefix_span.ctxt().outer_expn_data();

if expn_data.edition >= Edition::Edition2021 {
// In Rust 2021, this is a hard error.
let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg);
err.span_label(prefix_span, "unknown prefix");
if expn_data.is_root() {
err.span_suggestion_verbose(
prefix_span.shrink_to_hi(),
"consider inserting whitespace here",
" ".into(),
Applicability::MachineApplicable,
);
}
err.note("prefixed identifiers and literals are reserved since Rust 2021");
err.emit();
} else {
// Before Rust 2021, only emit a lint for migration.
self.sess.buffer_lint_with_diagnostic(
&RESERVED_PREFIX,
prefix_span,
ast::CRATE_NODE_ID,
&msg,
BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
);
}
}

/// Note: It was decided to not add a test case, because it would be too big.
/// <https://github.com/rust-lang/rust/pull/50296#issuecomment-392135180>
fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! {
Expand Down
2 changes: 1 addition & 1 deletion src/librustdoc/html/highlight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ impl<'a> Classifier<'a> {
},
c => c,
},
TokenKind::RawIdent => Class::Ident,
TokenKind::RawIdent | TokenKind::UnknownPrefix => Class::Ident,
TokenKind::Lifetime { .. } => Class::Lifetime,
};
// Anything that didn't return above is the simple case where we the
Expand Down
25 changes: 25 additions & 0 deletions src/test/ui/rust-2021/auxiliary/reserved-prefixes-macro-2018.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// force-host
// edition:2018
// no-prefer-dynamic

#![crate_type = "proc-macro"]

extern crate proc_macro;

use proc_macro::TokenStream;
use std::str::FromStr;

#[proc_macro]
pub fn number_of_tokens_in_a_prefixed_integer_literal(_: TokenStream) -> TokenStream {
TokenStream::from_str("hey#123").unwrap().into_iter().count().to_string().parse().unwrap()
}

#[proc_macro]
pub fn number_of_tokens_in_a_prefixed_char_literal(_: TokenStream) -> TokenStream {
TokenStream::from_str("hey#'a'").unwrap().into_iter().count().to_string().parse().unwrap()
}

#[proc_macro]
pub fn number_of_tokens_in_a_prefixed_string_literal(_: TokenStream) -> TokenStream {
TokenStream::from_str("hey#\"abc\"").unwrap().into_iter().count().to_string().parse().unwrap()
}
25 changes: 25 additions & 0 deletions src/test/ui/rust-2021/auxiliary/reserved-prefixes-macro-2021.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// force-host
// edition:2021
// no-prefer-dynamic

#![crate_type = "proc-macro"]

extern crate proc_macro;

use proc_macro::TokenStream;
use std::str::FromStr;

#[proc_macro]
pub fn number_of_tokens_in_a_prefixed_integer_literal(_: TokenStream) -> TokenStream {
TokenStream::from_str("hey#123").unwrap().into_iter().count().to_string().parse().unwrap()
}

#[proc_macro]
pub fn number_of_tokens_in_a_prefixed_char_literal(_: TokenStream) -> TokenStream {
TokenStream::from_str("hey#'a'").unwrap().into_iter().count().to_string().parse().unwrap()
}

#[proc_macro]
pub fn number_of_tokens_in_a_prefixed_string_literal(_: TokenStream) -> TokenStream {
TokenStream::from_str("hey#\"abc\"").unwrap().into_iter().count().to_string().parse().unwrap()
}
38 changes: 38 additions & 0 deletions src/test/ui/rust-2021/reserved-prefixes-migration.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// check-pass
// run-rustfix
// compile-flags: -Z unstable-options --edition 2018

#![warn(reserved_prefix)]

macro_rules! m2 {
($a:tt $b:tt) => {};
}

macro_rules! m3 {
($a:tt $b:tt $c:tt) => {};
}

fn main() {
m2!(z "hey");
//~^ WARNING prefix `z` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
m2!(prefix "hey");
//~^ WARNING prefix `prefix` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
m3!(hey #123);
//~^ WARNING prefix `hey` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
m3!(hey #hey);
//~^ WARNING prefix `hey` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
}

macro_rules! quote {
(# name = # kind # value) => {};
}

quote! {
#name = #kind #value
//~^ WARNING prefix `kind` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
}
38 changes: 38 additions & 0 deletions src/test/ui/rust-2021/reserved-prefixes-migration.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// check-pass
// run-rustfix
// compile-flags: -Z unstable-options --edition 2018

#![warn(reserved_prefix)]

macro_rules! m2 {
($a:tt $b:tt) => {};
}

macro_rules! m3 {
($a:tt $b:tt $c:tt) => {};
}

fn main() {
m2!(z"hey");
//~^ WARNING prefix `z` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
m2!(prefix"hey");
//~^ WARNING prefix `prefix` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
m3!(hey#123);
//~^ WARNING prefix `hey` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
m3!(hey#hey);
//~^ WARNING prefix `hey` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
}

macro_rules! quote {
(# name = # kind # value) => {};
}

quote! {
#name = #kind#value
//~^ WARNING prefix `kind` is unknown [reserved_prefix]
//~| WARNING hard error in Rust 2021
}
Loading

0 comments on commit e8cb1a4

Please sign in to comment.