diff --git a/src/librustc_ast/token.rs b/src/librustc_ast/token.rs index 173ea5e48d682..2b348617297e0 100644 --- a/src/librustc_ast/token.rs +++ b/src/librustc_ast/token.rs @@ -760,6 +760,67 @@ pub enum Nonterminal { #[cfg(target_arch = "x86_64")] rustc_data_structures::static_assert_size!(Nonterminal, 40); +#[derive(Debug, Copy, Clone, PartialEq, RustcEncodable, RustcDecodable)] +pub enum NonterminalKind { + Item, + Block, + Stmt, + Pat, + Expr, + Ty, + Ident, + Lifetime, + Literal, + Meta, + Path, + Vis, + TT, +} + +impl NonterminalKind { + pub fn from_symbol(symbol: Symbol) -> Option { + Some(match symbol { + sym::item => NonterminalKind::Item, + sym::block => NonterminalKind::Block, + sym::stmt => NonterminalKind::Stmt, + sym::pat => NonterminalKind::Pat, + sym::expr => NonterminalKind::Expr, + sym::ty => NonterminalKind::Ty, + sym::ident => NonterminalKind::Ident, + sym::lifetime => NonterminalKind::Lifetime, + sym::literal => NonterminalKind::Literal, + sym::meta => NonterminalKind::Meta, + sym::path => NonterminalKind::Path, + sym::vis => NonterminalKind::Vis, + sym::tt => NonterminalKind::TT, + _ => return None, + }) + } + fn symbol(self) -> Symbol { + match self { + NonterminalKind::Item => sym::item, + NonterminalKind::Block => sym::block, + NonterminalKind::Stmt => sym::stmt, + NonterminalKind::Pat => sym::pat, + NonterminalKind::Expr => sym::expr, + NonterminalKind::Ty => sym::ty, + NonterminalKind::Ident => sym::ident, + NonterminalKind::Lifetime => sym::lifetime, + NonterminalKind::Literal => sym::literal, + NonterminalKind::Meta => sym::meta, + NonterminalKind::Path => sym::path, + NonterminalKind::Vis => sym::vis, + NonterminalKind::TT => sym::tt, + } + } +} + +impl fmt::Display for NonterminalKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.symbol()) + } +} + impl Nonterminal { fn span(&self) -> Span { match self { diff --git a/src/librustc_expand/mbe.rs b/src/librustc_expand/mbe.rs index a728261d711a7..6f2daaa81c02f 100644 --- a/src/librustc_expand/mbe.rs +++ b/src/librustc_expand/mbe.rs @@ -9,7 +9,7 @@ crate mod macro_rules; crate mod quoted; crate mod transcribe; -use rustc_ast::token::{self, Token, TokenKind}; +use rustc_ast::token::{self, NonterminalKind, Token, TokenKind}; use rustc_ast::tokenstream::DelimSpan; use rustc_span::symbol::Ident; @@ -84,7 +84,7 @@ enum TokenTree { /// e.g., `$var` MetaVar(Span, Ident), /// e.g., `$var:expr`. This is only used in the left hand side of MBE macros. - MetaVarDecl(Span, Ident /* name to bind */, Ident /* kind of nonterminal */), + MetaVarDecl(Span, Ident /* name to bind */, Option), } impl TokenTree { diff --git a/src/librustc_expand/mbe/macro_parser.rs b/src/librustc_expand/mbe/macro_parser.rs index 3c15a81c67f67..d2fe7fe10a830 100644 --- a/src/librustc_expand/mbe/macro_parser.rs +++ b/src/librustc_expand/mbe/macro_parser.rs @@ -76,15 +76,11 @@ use TokenTreeOrTokenTreeSlice::*; use crate::mbe::{self, TokenTree}; -use rustc_ast::ptr::P; use rustc_ast::token::{self, DocComment, Nonterminal, Token}; -use rustc_ast_pretty::pprust; -use rustc_parse::parser::{FollowedByType, Parser, PathStyle}; +use rustc_parse::parser::Parser; use rustc_session::parse::ParseSess; -use rustc_span::symbol::{kw, sym, Ident, MacroRulesNormalizedIdent, Symbol}; +use rustc_span::symbol::MacroRulesNormalizedIdent; -use rustc_errors::PResult; -use rustc_span::Span; use smallvec::{smallvec, SmallVec}; use rustc_data_structures::fx::FxHashMap; @@ -382,7 +378,7 @@ fn nameize>( n_rec(sess, next_m, res.by_ref(), ret_val)?; } } - TokenTree::MetaVarDecl(span, _, id) if id.name == kw::Invalid => { + TokenTree::MetaVarDecl(span, _, None) => { if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { return Err((span, "missing fragment specifier".to_string())); } @@ -565,7 +561,7 @@ fn inner_parse_loop<'root, 'tt>( } // We need to match a metavar (but the identifier is invalid)... this is an error - TokenTree::MetaVarDecl(span, _, id) if id.name == kw::Invalid => { + TokenTree::MetaVarDecl(span, _, None) => { if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { return Error(span, "missing fragment specifier".to_string()); } @@ -573,10 +569,10 @@ fn inner_parse_loop<'root, 'tt>( // We need to match a metavar with a valid ident... call out to the black-box // parser by adding an item to `bb_items`. - TokenTree::MetaVarDecl(_, _, id) => { + TokenTree::MetaVarDecl(_, _, Some(kind)) => { // Built-in nonterminals never start with these tokens, // so we can eliminate them from consideration. - if may_begin_with(token, id.name) { + if Parser::nonterminal_may_begin_with(kind, token) { bb_items.push(item); } } @@ -706,7 +702,7 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na let nts = bb_items .iter() .map(|item| match item.top_elts.get_tt(item.idx) { - TokenTree::MetaVarDecl(_, bind, name) => format!("{} ('{}')", name, bind), + TokenTree::MetaVarDecl(_, bind, Some(kind)) => format!("{} ('{}')", kind, bind), _ => panic!(), }) .collect::>() @@ -736,10 +732,17 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na assert_eq!(bb_items.len(), 1); let mut item = bb_items.pop().unwrap(); - if let TokenTree::MetaVarDecl(span, _, ident) = item.top_elts.get_tt(item.idx) { + if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts.get_tt(item.idx) { let match_cur = item.match_cur; - let nt = match parse_nt(parser.to_mut(), span, ident.name) { - Err(()) => return ErrorReported, + let nt = match parser.to_mut().parse_nonterminal(kind) { + Err(mut err) => { + err.span_label( + span, + format!("while parsing argument for this `{}` macro fragment", kind), + ) + .emit(); + return ErrorReported; + } Ok(nt) => nt, }; item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt))); @@ -754,178 +757,3 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na assert!(!cur_items.is_empty()); } } - -/// The token is an identifier, but not `_`. -/// We prohibit passing `_` to macros expecting `ident` for now. -fn get_macro_ident(token: &Token) -> Option<(Ident, bool)> { - token.ident().filter(|(ident, _)| ident.name != kw::Underscore) -} - -/// Checks whether a non-terminal may begin with a particular token. -/// -/// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that -/// token. Be conservative (return true) if not sure. -fn may_begin_with(token: &Token, name: Symbol) -> bool { - /// Checks whether the non-terminal may contain a single (non-keyword) identifier. - fn may_be_ident(nt: &token::Nonterminal) -> bool { - match *nt { - token::NtItem(_) | token::NtBlock(_) | token::NtVis(_) | token::NtLifetime(_) => false, - _ => true, - } - } - - match name { - sym::expr => { - token.can_begin_expr() - // This exception is here for backwards compatibility. - && !token.is_keyword(kw::Let) - } - sym::ty => token.can_begin_type(), - sym::ident => get_macro_ident(token).is_some(), - sym::literal => token.can_begin_literal_maybe_minus(), - sym::vis => match token.kind { - // The follow-set of :vis + "priv" keyword + interpolated - token::Comma | token::Ident(..) | token::Interpolated(..) => true, - _ => token.can_begin_type(), - }, - sym::block => match token.kind { - token::OpenDelim(token::Brace) => true, - token::Interpolated(ref nt) => match **nt { - token::NtItem(_) - | token::NtPat(_) - | token::NtTy(_) - | token::NtIdent(..) - | token::NtMeta(_) - | token::NtPath(_) - | token::NtVis(_) => false, // none of these may start with '{'. - _ => true, - }, - _ => false, - }, - sym::path | sym::meta => match token.kind { - token::ModSep | token::Ident(..) => true, - token::Interpolated(ref nt) => match **nt { - token::NtPath(_) | token::NtMeta(_) => true, - _ => may_be_ident(&nt), - }, - _ => false, - }, - sym::pat => match token.kind { - token::Ident(..) | // box, ref, mut, and other identifiers (can stricten) - token::OpenDelim(token::Paren) | // tuple pattern - token::OpenDelim(token::Bracket) | // slice pattern - token::BinOp(token::And) | // reference - token::BinOp(token::Minus) | // negative literal - token::AndAnd | // double reference - token::Literal(..) | // literal - token::DotDot | // range pattern (future compat) - token::DotDotDot | // range pattern (future compat) - token::ModSep | // path - token::Lt | // path (UFCS constant) - token::BinOp(token::Shl) => true, // path (double UFCS) - token::Interpolated(ref nt) => may_be_ident(nt), - _ => false, - }, - sym::lifetime => match token.kind { - token::Lifetime(_) => true, - token::Interpolated(ref nt) => match **nt { - token::NtLifetime(_) | token::NtTT(_) => true, - _ => false, - }, - _ => false, - }, - _ => match token.kind { - token::CloseDelim(_) => false, - _ => true, - }, - } -} - -/// A call to the "black-box" parser to parse some Rust non-terminal. -/// -/// # Parameters -/// -/// - `p`: the "black-box" parser to use -/// - `sp`: the `Span` we want to parse -/// - `name`: the name of the metavar _matcher_ we want to match (e.g., `tt`, `ident`, `block`, -/// etc...) -/// -/// # Returns -/// -/// The parsed non-terminal. -fn parse_nt(p: &mut Parser<'_>, sp: Span, name: Symbol) -> Result { - // FIXME(Centril): Consider moving this to `parser.rs` to make - // the visibilities of the methods used below `pub(super)` at most. - if name == sym::tt { - return Ok(token::NtTT(p.parse_token_tree())); - } - parse_nt_inner(p, sp, name).map_err(|mut err| { - err.span_label(sp, format!("while parsing argument for this `{}` macro fragment", name)) - .emit() - }) -} - -fn parse_nt_inner<'a>(p: &mut Parser<'a>, sp: Span, name: Symbol) -> PResult<'a, Nonterminal> { - // Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`) - // needs to have them force-captured here. - // A `macro_rules!` invocation may pass a captured item/expr to a proc-macro, - // which requires having captured tokens available. Since we cannot determine - // in advance whether or not a proc-macro will be (transitively) invoked, - // we always capture tokens for any `Nonterminal` which needs them. - Ok(match name { - sym::item => match p.collect_tokens(|this| this.parse_item())? { - (Some(mut item), tokens) => { - // If we captured tokens during parsing (due to outer attributes), - // use those. - if item.tokens.is_none() { - item.tokens = Some(tokens); - } - token::NtItem(item) - } - (None, _) => return Err(p.struct_span_err(p.token.span, "expected an item keyword")), - }, - sym::block => token::NtBlock(p.parse_block()?), - sym::stmt => match p.parse_stmt()? { - Some(s) => token::NtStmt(s), - None => return Err(p.struct_span_err(p.token.span, "expected a statement")), - }, - sym::pat => token::NtPat(p.parse_pat(None)?), - sym::expr => { - let (mut expr, tokens) = p.collect_tokens(|this| this.parse_expr())?; - // If we captured tokens during parsing (due to outer attributes), - // use those. - if expr.tokens.is_none() { - expr.tokens = Some(tokens); - } - token::NtExpr(expr) - } - sym::literal => token::NtLiteral(p.parse_literal_maybe_minus()?), - sym::ty => token::NtTy(p.parse_ty()?), - // this could be handled like a token, since it is one - sym::ident => { - if let Some((ident, is_raw)) = get_macro_ident(&p.token) { - p.bump(); - token::NtIdent(ident, is_raw) - } else { - let token_str = pprust::token_to_string(&p.token); - let msg = &format!("expected ident, found {}", &token_str); - return Err(p.struct_span_err(p.token.span, msg)); - } - } - sym::path => token::NtPath(p.parse_path(PathStyle::Type)?), - sym::meta => token::NtMeta(P(p.parse_attr_item()?)), - sym::vis => token::NtVis(p.parse_visibility(FollowedByType::Yes)?), - sym::lifetime => { - if p.check_lifetime() { - token::NtLifetime(p.expect_lifetime().ident) - } else { - let token_str = pprust::token_to_string(&p.token); - let msg = &format!("expected a lifetime, found `{}`", &token_str); - return Err(p.struct_span_err(p.token.span, msg)); - } - } - // this is not supposed to happen, since it has been checked - // when compiling the macro. - _ => p.span_bug(sp, "invalid fragment specifier"), - }) -} diff --git a/src/librustc_expand/mbe/macro_rules.rs b/src/librustc_expand/mbe/macro_rules.rs index 36b323df69797..74d4023b41075 100644 --- a/src/librustc_expand/mbe/macro_rules.rs +++ b/src/librustc_expand/mbe/macro_rules.rs @@ -9,7 +9,7 @@ use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq}; use crate::mbe::transcribe::transcribe; use rustc_ast::ast; -use rustc_ast::token::{self, NtTT, Token, TokenKind::*}; +use rustc_ast::token::{self, NonterminalKind, NtTT, Token, TokenKind::*}; use rustc_ast::tokenstream::{DelimSpan, TokenStream}; use rustc_ast_pretty::pprust; use rustc_attr::{self as attr, TransparencyError}; @@ -21,7 +21,7 @@ use rustc_parse::parser::Parser; use rustc_session::parse::ParseSess; use rustc_span::edition::Edition; use rustc_span::hygiene::Transparency; -use rustc_span::symbol::{kw, sym, Ident, MacroRulesNormalizedIdent, Symbol}; +use rustc_span::symbol::{kw, sym, Ident, MacroRulesNormalizedIdent}; use rustc_span::Span; use log::debug; @@ -29,10 +29,6 @@ use std::borrow::Cow; use std::collections::hash_map::Entry; use std::{mem, slice}; -const VALID_FRAGMENT_NAMES_MSG: &str = "valid fragment specifiers are \ - `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, \ - `literal`, `path`, `meta`, `tt`, `item` and `vis`"; - crate struct ParserAnyMacro<'a> { parser: Parser<'a>, @@ -403,7 +399,7 @@ pub fn compile_declarative_macro( let diag = &sess.span_diagnostic; let lhs_nm = Ident::new(sym::lhs, def.span); let rhs_nm = Ident::new(sym::rhs, def.span); - let tt_spec = Ident::new(sym::tt, def.span); + let tt_spec = Some(NonterminalKind::TT); // Parse the macro_rules! invocation let (macro_rules, body) = match &def.kind { @@ -571,7 +567,7 @@ fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[mbe::TokenTree]) -> bool { TokenTree::Sequence(span, ref seq) => { if seq.separator.is_none() && seq.tts.iter().all(|seq_tt| match *seq_tt { - TokenTree::MetaVarDecl(_, _, id) => id.name == sym::vis, + TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Vis)) => true, TokenTree::Sequence(_, ref sub_seq) => { sub_seq.kleene.op == mbe::KleeneOp::ZeroOrMore || sub_seq.kleene.op == mbe::KleeneOp::ZeroOrOne @@ -890,21 +886,7 @@ fn check_matcher_core( // of NT tokens that might end the sequence `... token`. match *token { TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => { - let can_be_followed_by_any; - if let Err(bad_frag) = has_legal_fragment_specifier(sess, features, attrs, token) { - let msg = format!("invalid fragment specifier `{}`", bad_frag); - sess.span_diagnostic - .struct_span_err(token.span(), &msg) - .help(VALID_FRAGMENT_NAMES_MSG) - .emit(); - // (This eliminates false positives and duplicates - // from error messages.) - can_be_followed_by_any = true; - } else { - can_be_followed_by_any = token_can_be_followed_by_any(token); - } - - if can_be_followed_by_any { + if token_can_be_followed_by_any(token) { // don't need to track tokens that work with any, last.replace_with_irrelevant(); // ... and don't need to check tokens that can be @@ -967,19 +949,10 @@ fn check_matcher_core( // Now `last` holds the complete set of NT tokens that could // end the sequence before SUFFIX. Check that every one works with `suffix`. - 'each_last: for token in &last.tokens { - if let TokenTree::MetaVarDecl(_, name, frag_spec) = *token { + for token in &last.tokens { + if let TokenTree::MetaVarDecl(_, name, Some(kind)) = *token { for next_token in &suffix_first.tokens { - match is_in_follow(next_token, frag_spec.name) { - IsInFollow::Invalid(msg, help) => { - sess.span_diagnostic - .struct_span_err(next_token.span(), &msg) - .help(help) - .emit(); - // don't bother reporting every source of - // conflict for a particular element of `last`. - continue 'each_last; - } + match is_in_follow(next_token, kind) { IsInFollow::Yes => {} IsInFollow::No(possible) => { let may_be = if last.tokens.len() == 1 && suffix_first.tokens.len() == 1 @@ -996,22 +969,19 @@ fn check_matcher_core( "`${name}:{frag}` {may_be} followed by `{next}`, which \ is not allowed for `{frag}` fragments", name = name, - frag = frag_spec, + frag = kind, next = quoted_tt_to_string(next_token), may_be = may_be ), ); - err.span_label( - sp, - format!("not allowed after `{}` fragments", frag_spec), - ); + err.span_label(sp, format!("not allowed after `{}` fragments", kind)); let msg = "allowed there are: "; match possible { &[] => {} &[t] => { err.note(&format!( "only {} is allowed after `{}` fragments", - t, frag_spec, + t, kind, )); } ts => { @@ -1038,8 +1008,8 @@ fn check_matcher_core( } fn token_can_be_followed_by_any(tok: &mbe::TokenTree) -> bool { - if let mbe::TokenTree::MetaVarDecl(_, _, frag_spec) = *tok { - frag_can_be_followed_by_any(frag_spec.name) + if let mbe::TokenTree::MetaVarDecl(_, _, Some(kind)) = *tok { + frag_can_be_followed_by_any(kind) } else { // (Non NT's can always be followed by anything in matchers.) true @@ -1054,26 +1024,23 @@ fn token_can_be_followed_by_any(tok: &mbe::TokenTree) -> bool { /// specifier which consumes at most one token tree can be followed by /// a fragment specifier (indeed, these fragments can be followed by /// ANYTHING without fear of future compatibility hazards). -fn frag_can_be_followed_by_any(frag: Symbol) -> bool { - match frag { - sym::item | // always terminated by `}` or `;` - sym::block | // exactly one token tree - sym::ident | // exactly one token tree - sym::literal | // exactly one token tree - sym::meta | // exactly one token tree - sym::lifetime | // exactly one token tree - sym::tt => // exactly one token tree - true, - - _ => - false, +fn frag_can_be_followed_by_any(kind: NonterminalKind) -> bool { + match kind { + NonterminalKind::Item // always terminated by `}` or `;` + | NonterminalKind::Block // exactly one token tree + | NonterminalKind::Ident // exactly one token tree + | NonterminalKind::Literal // exactly one token tree + | NonterminalKind::Meta // exactly one token tree + | NonterminalKind::Lifetime // exactly one token tree + | NonterminalKind::TT => true, // exactly one token tree + + _ => false, } } enum IsInFollow { Yes, No(&'static [&'static str]), - Invalid(String, &'static str), } /// Returns `true` if `frag` can legally be followed by the token `tok`. For @@ -1084,7 +1051,7 @@ enum IsInFollow { /// break macros that were relying on that binary operator as a /// separator. // when changing this do not forget to update doc/book/macros.md! -fn is_in_follow(tok: &mbe::TokenTree, frag: Symbol) -> IsInFollow { +fn is_in_follow(tok: &mbe::TokenTree, kind: NonterminalKind) -> IsInFollow { use mbe::TokenTree; if let TokenTree::Token(Token { kind: token::CloseDelim(_), .. }) = *tok { @@ -1092,18 +1059,18 @@ fn is_in_follow(tok: &mbe::TokenTree, frag: Symbol) -> IsInFollow { // iow, we always require that `(` and `)` match, etc. IsInFollow::Yes } else { - match frag { - sym::item => { + match kind { + NonterminalKind::Item => { // since items *must* be followed by either a `;` or a `}`, we can // accept anything after them IsInFollow::Yes } - sym::block => { + NonterminalKind::Block => { // anything can follow block, the braces provide an easy boundary to // maintain IsInFollow::Yes } - sym::stmt | sym::expr => { + NonterminalKind::Stmt | NonterminalKind::Expr => { const TOKENS: &[&str] = &["`=>`", "`,`", "`;`"]; match tok { TokenTree::Token(token) => match token.kind { @@ -1113,7 +1080,7 @@ fn is_in_follow(tok: &mbe::TokenTree, frag: Symbol) -> IsInFollow { _ => IsInFollow::No(TOKENS), } } - sym::pat => { + NonterminalKind::Pat => { const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`|`", "`if`", "`in`"]; match tok { TokenTree::Token(token) => match token.kind { @@ -1124,7 +1091,7 @@ fn is_in_follow(tok: &mbe::TokenTree, frag: Symbol) -> IsInFollow { _ => IsInFollow::No(TOKENS), } } - sym::path | sym::ty => { + NonterminalKind::Path | NonterminalKind::Ty => { const TOKENS: &[&str] = &[ "`{`", "`[`", "`=>`", "`,`", "`>`", "`=`", "`:`", "`;`", "`|`", "`as`", "`where`", @@ -1146,26 +1113,24 @@ fn is_in_follow(tok: &mbe::TokenTree, frag: Symbol) -> IsInFollow { } _ => IsInFollow::No(TOKENS), }, - TokenTree::MetaVarDecl(_, _, frag) if frag.name == sym::block => { - IsInFollow::Yes - } + TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Block)) => IsInFollow::Yes, _ => IsInFollow::No(TOKENS), } } - sym::ident | sym::lifetime => { + NonterminalKind::Ident | NonterminalKind::Lifetime => { // being a single token, idents and lifetimes are harmless IsInFollow::Yes } - sym::literal => { + NonterminalKind::Literal => { // literals may be of a single token, or two tokens (negative numbers) IsInFollow::Yes } - sym::meta | sym::tt => { + NonterminalKind::Meta | NonterminalKind::TT => { // being either a single token or a delimited sequence, tt is // harmless IsInFollow::Yes } - sym::vis => { + NonterminalKind::Vis => { // Explicitly disallow `priv`, on the off chance it comes back. const TOKENS: &[&str] = &["`,`", "an ident", "a type"]; match tok { @@ -1180,78 +1145,24 @@ fn is_in_follow(tok: &mbe::TokenTree, frag: Symbol) -> IsInFollow { } } }, - TokenTree::MetaVarDecl(_, _, frag) - if frag.name == sym::ident - || frag.name == sym::ty - || frag.name == sym::path => - { - IsInFollow::Yes - } + TokenTree::MetaVarDecl( + _, + _, + Some(NonterminalKind::Ident | NonterminalKind::Ty | NonterminalKind::Path), + ) => IsInFollow::Yes, _ => IsInFollow::No(TOKENS), } } - kw::Invalid => IsInFollow::Yes, - _ => IsInFollow::Invalid( - format!("invalid fragment specifier `{}`", frag), - VALID_FRAGMENT_NAMES_MSG, - ), } } } -fn has_legal_fragment_specifier( - sess: &ParseSess, - features: &Features, - attrs: &[ast::Attribute], - tok: &mbe::TokenTree, -) -> Result<(), String> { - debug!("has_legal_fragment_specifier({:?})", tok); - if let mbe::TokenTree::MetaVarDecl(_, _, ref frag_spec) = *tok { - let frag_span = tok.span(); - if !is_legal_fragment_specifier(sess, features, attrs, frag_spec.name, frag_span) { - return Err(frag_spec.to_string()); - } - } - Ok(()) -} - -fn is_legal_fragment_specifier( - _sess: &ParseSess, - _features: &Features, - _attrs: &[ast::Attribute], - frag_name: Symbol, - _frag_span: Span, -) -> bool { - /* - * If new fragment specifiers are invented in nightly, `_sess`, - * `_features`, `_attrs`, and `_frag_span` will be useful here - * for checking against feature gates. See past versions of - * this function. - */ - match frag_name { - sym::item - | sym::block - | sym::stmt - | sym::expr - | sym::pat - | sym::lifetime - | sym::path - | sym::ty - | sym::ident - | sym::meta - | sym::tt - | sym::vis - | sym::literal - | kw::Invalid => true, - _ => false, - } -} - fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String { match *tt { mbe::TokenTree::Token(ref token) => pprust::token_to_string(&token), mbe::TokenTree::MetaVar(_, name) => format!("${}", name), - mbe::TokenTree::MetaVarDecl(_, name, kind) => format!("${}:{}", name, kind), + mbe::TokenTree::MetaVarDecl(_, name, Some(kind)) => format!("${}:{}", name, kind), + mbe::TokenTree::MetaVarDecl(_, name, None) => format!("${}:", name), _ => panic!( "unexpected mbe::TokenTree::{{Sequence or Delimited}} \ in follow set checker" diff --git a/src/librustc_expand/mbe/quoted.rs b/src/librustc_expand/mbe/quoted.rs index 09306f26ee0ad..774cc84afdeb1 100644 --- a/src/librustc_expand/mbe/quoted.rs +++ b/src/librustc_expand/mbe/quoted.rs @@ -12,6 +12,10 @@ use rustc_span::Span; use rustc_data_structures::sync::Lrc; +const VALID_FRAGMENT_NAMES_MSG: &str = "valid fragment specifiers are \ + `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, \ + `literal`, `path`, `meta`, `tt`, `item` and `vis`"; + /// Takes a `tokenstream::TokenStream` and returns a `Vec`. Specifically, this /// takes a generic `TokenStream`, such as is used in the rest of the compiler, and returns a /// collection of `TokenTree` for use in parsing a macro. @@ -55,9 +59,21 @@ pub(super) fn parse( Some(tokenstream::TokenTree::Token(Token { kind: token::Colon, span })) => { match trees.next() { Some(tokenstream::TokenTree::Token(token)) => match token.ident() { - Some((kind, _)) => { + Some((frag, _)) => { let span = token.span.with_lo(start_sp.lo()); - result.push(TokenTree::MetaVarDecl(span, ident, kind)); + let kind = token::NonterminalKind::from_symbol(frag.name) + .unwrap_or_else(|| { + let msg = format!( + "invalid fragment specifier `{}`", + frag.name + ); + sess.span_diagnostic + .struct_span_err(span, &msg) + .help(VALID_FRAGMENT_NAMES_MSG) + .emit(); + token::NonterminalKind::Ident + }); + result.push(TokenTree::MetaVarDecl(span, ident, Some(kind))); continue; } _ => token.span, @@ -71,7 +87,7 @@ pub(super) fn parse( // Macros loaded from other crates have dummy node ids. sess.missing_fragment_specifiers.borrow_mut().insert(span, node_id); } - result.push(TokenTree::MetaVarDecl(span, ident, Ident::invalid())); + result.push(TokenTree::MetaVarDecl(span, ident, None)); } // Not a metavar or no matchers allowed, so just return the tree diff --git a/src/librustc_parse/parser/expr.rs b/src/librustc_parse/parser/expr.rs index d06b172bc1484..3aec300d86d4f 100644 --- a/src/librustc_parse/parser/expr.rs +++ b/src/librustc_parse/parser/expr.rs @@ -1450,7 +1450,7 @@ impl<'a> Parser<'a> { /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). /// Keep this in sync with `Token::can_begin_literal_maybe_minus`. - pub fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P> { + pub(super) fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P> { maybe_whole_expr!(self); let lo = self.token.span; diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index 72866468b6560..2509a9792215d 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -1,16 +1,17 @@ pub mod attr; +mod diagnostics; mod expr; +mod generics; mod item; +mod nonterminal; mod pat; mod path; -mod ty; -pub use path::PathStyle; -mod diagnostics; -mod generics; mod stmt; -use diagnostics::Error; +mod ty; use crate::lexer::UnmatchedBrace; +use diagnostics::Error; +pub use path::PathStyle; use log::debug; use rustc_ast::ast::DUMMY_NODE_ID; @@ -958,7 +959,7 @@ impl<'a> Parser<'a> { } /// Parses a single token tree from the input. - pub fn parse_token_tree(&mut self) -> TokenTree { + pub(crate) fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { let frame = mem::replace( @@ -1017,7 +1018,7 @@ impl<'a> Parser<'a> { /// If the following element can't be a tuple (i.e., it's a function definition), then /// it's not a tuple struct field), and the contents within the parentheses isn't valid, /// so emit a proper diagnostic. - pub fn parse_visibility(&mut self, fbt: FollowedByType) -> PResult<'a, Visibility> { + pub(crate) fn parse_visibility(&mut self, fbt: FollowedByType) -> PResult<'a, Visibility> { maybe_whole!(self, NtVis, |x| x); self.expected_tokens.push(TokenType::Keyword(kw::Crate)); diff --git a/src/librustc_parse/parser/nonterminal.rs b/src/librustc_parse/parser/nonterminal.rs new file mode 100644 index 0000000000000..12139771bbfda --- /dev/null +++ b/src/librustc_parse/parser/nonterminal.rs @@ -0,0 +1,163 @@ +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Nonterminal, NonterminalKind, Token}; +use rustc_ast_pretty::pprust; +use rustc_errors::PResult; +use rustc_span::symbol::{kw, Ident}; + +use crate::parser::{FollowedByType, Parser, PathStyle}; + +impl<'a> Parser<'a> { + /// Checks whether a non-terminal may begin with a particular token. + /// + /// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that + /// token. Be conservative (return true) if not sure. + pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool { + /// Checks whether the non-terminal may contain a single (non-keyword) identifier. + fn may_be_ident(nt: &token::Nonterminal) -> bool { + match *nt { + token::NtItem(_) | token::NtBlock(_) | token::NtVis(_) | token::NtLifetime(_) => { + false + } + _ => true, + } + } + + match kind { + NonterminalKind::Expr => { + token.can_begin_expr() + // This exception is here for backwards compatibility. + && !token.is_keyword(kw::Let) + } + NonterminalKind::Ty => token.can_begin_type(), + NonterminalKind::Ident => get_macro_ident(token).is_some(), + NonterminalKind::Literal => token.can_begin_literal_maybe_minus(), + NonterminalKind::Vis => match token.kind { + // The follow-set of :vis + "priv" keyword + interpolated + token::Comma | token::Ident(..) | token::Interpolated(..) => true, + _ => token.can_begin_type(), + }, + NonterminalKind::Block => match token.kind { + token::OpenDelim(token::Brace) => true, + token::Interpolated(ref nt) => match **nt { + token::NtItem(_) + | token::NtPat(_) + | token::NtTy(_) + | token::NtIdent(..) + | token::NtMeta(_) + | token::NtPath(_) + | token::NtVis(_) => false, // none of these may start with '{'. + _ => true, + }, + _ => false, + }, + NonterminalKind::Path | NonterminalKind::Meta => match token.kind { + token::ModSep | token::Ident(..) => true, + token::Interpolated(ref nt) => match **nt { + token::NtPath(_) | token::NtMeta(_) => true, + _ => may_be_ident(&nt), + }, + _ => false, + }, + NonterminalKind::Pat => match token.kind { + token::Ident(..) | // box, ref, mut, and other identifiers (can stricten) + token::OpenDelim(token::Paren) | // tuple pattern + token::OpenDelim(token::Bracket) | // slice pattern + token::BinOp(token::And) | // reference + token::BinOp(token::Minus) | // negative literal + token::AndAnd | // double reference + token::Literal(..) | // literal + token::DotDot | // range pattern (future compat) + token::DotDotDot | // range pattern (future compat) + token::ModSep | // path + token::Lt | // path (UFCS constant) + token::BinOp(token::Shl) => true, // path (double UFCS) + token::Interpolated(ref nt) => may_be_ident(nt), + _ => false, + }, + NonterminalKind::Lifetime => match token.kind { + token::Lifetime(_) => true, + token::Interpolated(ref nt) => match **nt { + token::NtLifetime(_) | token::NtTT(_) => true, + _ => false, + }, + _ => false, + }, + NonterminalKind::TT | NonterminalKind::Item | NonterminalKind::Stmt => match token.kind + { + token::CloseDelim(_) => false, + _ => true, + }, + } + } + + pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, Nonterminal> { + // Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`) + // needs to have them force-captured here. + // A `macro_rules!` invocation may pass a captured item/expr to a proc-macro, + // which requires having captured tokens available. Since we cannot determine + // in advance whether or not a proc-macro will be (transitively) invoked, + // we always capture tokens for any `Nonterminal` which needs them. + Ok(match kind { + NonterminalKind::Item => match self.collect_tokens(|this| this.parse_item())? { + (Some(mut item), tokens) => { + // If we captured tokens during parsing (due to outer attributes), + // use those. + if item.tokens.is_none() { + item.tokens = Some(tokens); + } + token::NtItem(item) + } + (None, _) => { + return Err(self.struct_span_err(self.token.span, "expected an item keyword")); + } + }, + NonterminalKind::Block => token::NtBlock(self.parse_block()?), + NonterminalKind::Stmt => match self.parse_stmt()? { + Some(s) => token::NtStmt(s), + None => return Err(self.struct_span_err(self.token.span, "expected a statement")), + }, + NonterminalKind::Pat => token::NtPat(self.parse_pat(None)?), + NonterminalKind::Expr => { + let (mut expr, tokens) = self.collect_tokens(|this| this.parse_expr())?; + // If we captured tokens during parsing (due to outer attributes), + // use those. + if expr.tokens.is_none() { + expr.tokens = Some(tokens); + } + token::NtExpr(expr) + } + NonterminalKind::Literal => token::NtLiteral(self.parse_literal_maybe_minus()?), + NonterminalKind::Ty => token::NtTy(self.parse_ty()?), + // this could be handled like a token, since it is one + NonterminalKind::Ident => { + if let Some((ident, is_raw)) = get_macro_ident(&self.token) { + self.bump(); + token::NtIdent(ident, is_raw) + } else { + let token_str = pprust::token_to_string(&self.token); + let msg = &format!("expected ident, found {}", &token_str); + return Err(self.struct_span_err(self.token.span, msg)); + } + } + NonterminalKind::Path => token::NtPath(self.parse_path(PathStyle::Type)?), + NonterminalKind::Meta => token::NtMeta(P(self.parse_attr_item()?)), + NonterminalKind::TT => token::NtTT(self.parse_token_tree()), + NonterminalKind::Vis => token::NtVis(self.parse_visibility(FollowedByType::Yes)?), + NonterminalKind::Lifetime => { + if self.check_lifetime() { + token::NtLifetime(self.expect_lifetime().ident) + } else { + let token_str = pprust::token_to_string(&self.token); + let msg = &format!("expected a lifetime, found `{}`", &token_str); + return Err(self.struct_span_err(self.token.span, msg)); + } + } + }) + } +} + +/// The token is an identifier, but not `_`. +/// We prohibit passing `_` to macros expecting `ident` for now. +fn get_macro_ident(token: &Token) -> Option<(Ident, bool)> { + token.ident().filter(|(ident, _)| ident.name != kw::Underscore) +} diff --git a/src/librustc_parse/parser/path.rs b/src/librustc_parse/parser/path.rs index 3dcefd362574b..d4e44c54b1274 100644 --- a/src/librustc_parse/parser/path.rs +++ b/src/librustc_parse/parser/path.rs @@ -125,7 +125,7 @@ impl<'a> Parser<'a> { /// `a::b::C::` (with disambiguator) /// `Fn(Args)` (without disambiguator) /// `Fn::(Args)` (with disambiguator) - pub fn parse_path(&mut self, style: PathStyle) -> PResult<'a, Path> { + pub(super) fn parse_path(&mut self, style: PathStyle) -> PResult<'a, Path> { maybe_whole!(self, NtPath, |path| { if style == PathStyle::Mod && path.segments.iter().any(|segment| segment.args.is_some()) { diff --git a/src/librustc_parse/parser/stmt.rs b/src/librustc_parse/parser/stmt.rs index d04920de47f09..5c3a5e9987324 100644 --- a/src/librustc_parse/parser/stmt.rs +++ b/src/librustc_parse/parser/stmt.rs @@ -21,7 +21,7 @@ use std::mem; impl<'a> Parser<'a> { /// Parses a statement. This stops just before trailing semicolons on everything but items. /// e.g., a `StmtKind::Semi` parses to a `StmtKind::Expr`, leaving the trailing `;` unconsumed. - pub fn parse_stmt(&mut self) -> PResult<'a, Option> { + pub(super) fn parse_stmt(&mut self) -> PResult<'a, Option> { Ok(self.parse_stmt_without_recovery().unwrap_or_else(|mut e| { e.emit(); self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore); @@ -247,7 +247,7 @@ impl<'a> Parser<'a> { } /// Parses a block. No inner attributes are allowed. - pub fn parse_block(&mut self) -> PResult<'a, P> { + pub(super) fn parse_block(&mut self) -> PResult<'a, P> { let (attrs, block) = self.parse_inner_attrs_and_block()?; if let [.., last] = &*attrs { self.error_on_forbidden_inner_attr(last.span, DEFAULT_INNER_ATTR_FORBIDDEN); diff --git a/src/librustc_parse/parser/ty.rs b/src/librustc_parse/parser/ty.rs index a6015504a3287..cd66b917f232a 100644 --- a/src/librustc_parse/parser/ty.rs +++ b/src/librustc_parse/parser/ty.rs @@ -610,13 +610,13 @@ impl<'a> Parser<'a> { } } - pub fn check_lifetime(&mut self) -> bool { + pub(super) fn check_lifetime(&mut self) -> bool { self.expected_tokens.push(TokenType::Lifetime); self.token.is_lifetime() } /// Parses a single lifetime `'a` or panics. - pub fn expect_lifetime(&mut self) -> Lifetime { + pub(super) fn expect_lifetime(&mut self) -> Lifetime { if let Some(ident) = self.token.lifetime() { self.bump(); Lifetime { ident, id: ast::DUMMY_NODE_ID }