From cbdfa1edcaab0dc86b5f9696dea790403bcb0f19 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Thu, 6 May 2021 16:21:40 +0300 Subject: [PATCH] parser: Ensure that all nonterminals have tokens after parsing --- compiler/rustc_ast/src/ast_like.rs | 3 +- .../rustc_parse/src/parser/attr_wrapper.rs | 10 +- compiler/rustc_parse/src/parser/expr.rs | 12 +- compiler/rustc_parse/src/parser/mod.rs | 1 + .../rustc_parse/src/parser/nonterminal.rs | 17 +- compiler/rustc_parse/src/parser/stmt.rs | 23 +- .../expr-stmt-nonterminal-tokens.rs | 37 ++ .../expr-stmt-nonterminal-tokens.stdout | 540 ++++++++++++++++++ 8 files changed, 612 insertions(+), 31 deletions(-) create mode 100644 src/test/ui/proc-macro/expr-stmt-nonterminal-tokens.rs create mode 100644 src/test/ui/proc-macro/expr-stmt-nonterminal-tokens.stdout diff --git a/compiler/rustc_ast/src/ast_like.rs b/compiler/rustc_ast/src/ast_like.rs index 945a44ab66371..d586426d70ef0 100644 --- a/compiler/rustc_ast/src/ast_like.rs +++ b/compiler/rustc_ast/src/ast_like.rs @@ -82,7 +82,8 @@ impl AstLike for crate::token::Nonterminal { Nonterminal::NtMeta(attr_item) => attr_item.tokens_mut(), Nonterminal::NtPath(path) => path.tokens_mut(), Nonterminal::NtVis(vis) => vis.tokens_mut(), - _ => panic!("Called tokens_mut on {:?}", self), + Nonterminal::NtBlock(block) => block.tokens_mut(), + Nonterminal::NtIdent(..) | Nonterminal::NtLifetime(..) | Nonterminal::NtTT(..) => None, } } } diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 35759a396e87c..e1d0b84f4193f 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -342,16 +342,10 @@ impl<'a> Parser<'a> { // If we support tokens at all if let Some(target_tokens) = ret.tokens_mut() { - if let Some(target_tokens) = target_tokens { - assert!( - !self.capture_cfg, - "Encountered existing tokens with capture_cfg set: {:?}", - target_tokens - ); - } else { + if target_tokens.is_none() { // Store se our newly captured tokens into the AST node *target_tokens = Some(tokens.clone()); - }; + } } let final_attrs = ret.attrs(); diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 56c97b5947682..a764cf6bdb04e 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -94,17 +94,7 @@ impl<'a> Parser<'a> { /// Parses an expression, forcing tokens to be collected pub fn parse_expr_force_collect(&mut self) -> PResult<'a, P> { - // If we have outer attributes, then the call to `collect_tokens_trailing_token` - // will be made for us. - if matches!(self.token.kind, TokenKind::Pound | TokenKind::DocComment(..)) { - self.parse_expr() - } else { - // If we don't have outer attributes, then we need to ensure - // that collection happens by using `collect_tokens_no_attrs`. - // Expression don't support custom inner attributes, so `parse_expr` - // will never try to collect tokens if we don't have outer attributes. - self.collect_tokens_no_attrs(|this| this.parse_expr()) - } + self.collect_tokens_no_attrs(|this| this.parse_expr()) } pub fn parse_anon_const_expr(&mut self) -> PResult<'a, AnonConst> { diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 4c2bc6ebf3143..cd9f84db5e559 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -63,6 +63,7 @@ enum BlockMode { /// Whether or not we should force collection of tokens for an AST node, /// regardless of whether or not it has attributes +#[derive(Clone, Copy, PartialEq)] pub enum ForceCollect { Yes, No, diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index 0c43e304f1ee2..30a6b61407f69 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -1,5 +1,6 @@ use rustc_ast::ptr::P; use rustc_ast::token::{self, Nonterminal, NonterminalKind, Token}; +use rustc_ast::AstLike; use rustc_ast_pretty::pprust; use rustc_errors::PResult; use rustc_span::symbol::{kw, Ident}; @@ -102,7 +103,7 @@ impl<'a> Parser<'a> { // which requires having captured tokens available. Since we cannot determine // in advance whether or not a proc-macro will be (transitively) invoked, // we always capture tokens for any `Nonterminal` which needs them. - Ok(match kind { + let mut nt = match kind { NonterminalKind::Item => match self.parse_item(ForceCollect::Yes)? { Some(item) => token::NtItem(item), None => { @@ -169,7 +170,19 @@ impl<'a> Parser<'a> { return Err(self.struct_span_err(self.token.span, msg)); } } - }) + }; + + // If tokens are supported at all, they should be collected. + if matches!(nt.tokens_mut(), Some(None)) { + panic!( + "Missing tokens for nt {:?} at {:?}: {:?}", + nt, + nt.span(), + pprust::nonterminal_to_string(&nt) + ); + } + + Ok(nt) } } diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index b40eed8c5d118..4f0dcfeb5dae0 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -73,7 +73,11 @@ impl<'a> Parser<'a> { // or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something // that starts like a path (1 token), but it fact not a path. // Also, we avoid stealing syntax from `parse_item_`. - self.parse_stmt_path_start(lo, attrs, force_collect)? + if force_collect == ForceCollect::Yes { + self.collect_tokens_no_attrs(|this| this.parse_stmt_path_start(lo, attrs)) + } else { + self.parse_stmt_path_start(lo, attrs) + }? } else if let Some(item) = self.parse_item_common(attrs.clone(), false, true, |_| true, force_collect)? { @@ -85,7 +89,13 @@ impl<'a> Parser<'a> { self.mk_stmt(lo, StmtKind::Empty) } else if self.token != token::CloseDelim(token::Brace) { // Remainder are line-expr stmts. - let e = self.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs))?; + let e = if force_collect == ForceCollect::Yes { + self.collect_tokens_no_attrs(|this| { + this.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs)) + }) + } else { + self.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs)) + }?; self.mk_stmt(lo.to(e.span), StmtKind::Expr(e)) } else { self.error_outer_attrs(&attrs.take_for_recovery()); @@ -93,13 +103,8 @@ impl<'a> Parser<'a> { })) } - fn parse_stmt_path_start( - &mut self, - lo: Span, - attrs: AttrWrapper, - force_collect: ForceCollect, - ) -> PResult<'a, Stmt> { - let stmt = self.collect_tokens_trailing_token(attrs, force_collect, |this, attrs| { + fn parse_stmt_path_start(&mut self, lo: Span, attrs: AttrWrapper) -> PResult<'a, Stmt> { + let stmt = self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { let path = this.parse_path(PathStyle::Expr)?; if this.eat(&token::Not) { diff --git a/src/test/ui/proc-macro/expr-stmt-nonterminal-tokens.rs b/src/test/ui/proc-macro/expr-stmt-nonterminal-tokens.rs new file mode 100644 index 0000000000000..d4067a3359271 --- /dev/null +++ b/src/test/ui/proc-macro/expr-stmt-nonterminal-tokens.rs @@ -0,0 +1,37 @@ +// check-pass +// aux-build:test-macros.rs + +#![feature(decl_macro)] +#![feature(stmt_expr_attributes)] + +#![no_std] // Don't load unnecessary hygiene information from std +extern crate std; + +#[macro_use] +extern crate test_macros; + +macro mac { + (expr $expr:expr) => { + #[derive(Print)] + enum E { + V = { let _ = $expr; 0 }, + } + }, + (stmt $stmt:stmt) => { + #[derive(Print)] + enum E { + V = { let _ = { $stmt }; 0 }, + } + }, +} + +const PATH: u8 = 2; + +fn main() { + mac!(expr #[allow(warnings)] 0); + mac!(stmt 0); + mac!(stmt {}); + mac!(stmt PATH); + mac!(stmt 0 + 1); + mac!(stmt PATH + 1); +} diff --git a/src/test/ui/proc-macro/expr-stmt-nonterminal-tokens.stdout b/src/test/ui/proc-macro/expr-stmt-nonterminal-tokens.stdout new file mode 100644 index 0000000000000..e37a483cb87bc --- /dev/null +++ b/src/test/ui/proc-macro/expr-stmt-nonterminal-tokens.stdout @@ -0,0 +1,540 @@ +PRINT-DERIVE INPUT (DISPLAY): enum E { V = { let _ = #[allow(warnings)] 0 ; 0 }, } +PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): enum E { V = { let _ = #[allow(warnings)] #[allow(warnings)] 0 ; 0 }, } +PRINT-DERIVE INPUT (DEBUG): TokenStream [ + Ident { + ident: "enum", + span: #4 bytes(299..303), + }, + Ident { + ident: "E", + span: #4 bytes(304..305), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "V", + span: #4 bytes(320..321), + }, + Punct { + ch: '=', + spacing: Alone, + span: #4 bytes(322..323), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "let", + span: #4 bytes(326..329), + }, + Ident { + ident: "_", + span: #4 bytes(330..331), + }, + Punct { + ch: '=', + spacing: Alone, + span: #4 bytes(332..333), + }, + Group { + delimiter: None, + stream: TokenStream [ + Punct { + ch: '#', + spacing: Alone, + span: #0 bytes(541..542), + }, + Group { + delimiter: Bracket, + stream: TokenStream [ + Ident { + ident: "allow", + span: #0 bytes(543..548), + }, + Group { + delimiter: Parenthesis, + stream: TokenStream [ + Ident { + ident: "warnings", + span: #0 bytes(549..557), + }, + ], + span: #0 bytes(548..558), + }, + ], + span: #0 bytes(542..559), + }, + Punct { + ch: '#', + spacing: Alone, + span: #0 bytes(541..542), + }, + Group { + delimiter: Bracket, + stream: TokenStream [ + Ident { + ident: "allow", + span: #0 bytes(543..548), + }, + Group { + delimiter: Parenthesis, + stream: TokenStream [ + Ident { + ident: "warnings", + span: #0 bytes(549..557), + }, + ], + span: #0 bytes(548..558), + }, + ], + span: #0 bytes(542..559), + }, + Literal { + kind: Integer, + symbol: "0", + suffix: None, + span: #0 bytes(560..561), + }, + ], + span: #4 bytes(334..339), + }, + Punct { + ch: ';', + spacing: Alone, + span: #4 bytes(339..340), + }, + Literal { + kind: Integer, + symbol: "0", + suffix: None, + span: #4 bytes(341..342), + }, + ], + span: #4 bytes(324..344), + }, + Punct { + ch: ',', + spacing: Alone, + span: #4 bytes(344..345), + }, + ], + span: #4 bytes(306..355), + }, +] +PRINT-DERIVE INPUT (DISPLAY): enum E { V = { let _ = { 0; } ; 0 }, } +PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): enum E { V = { let _ = { 0 } ; 0 }, } +PRINT-DERIVE INPUT (DEBUG): TokenStream [ + Ident { + ident: "enum", + span: #8 bytes(423..427), + }, + Ident { + ident: "E", + span: #8 bytes(428..429), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "V", + span: #8 bytes(444..445), + }, + Punct { + ch: '=', + spacing: Alone, + span: #8 bytes(446..447), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "let", + span: #8 bytes(450..453), + }, + Ident { + ident: "_", + span: #8 bytes(454..455), + }, + Punct { + ch: '=', + spacing: Alone, + span: #8 bytes(456..457), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Group { + delimiter: None, + stream: TokenStream [ + Literal { + kind: Integer, + symbol: "0", + suffix: None, + span: #0 bytes(578..579), + }, + ], + span: #8 bytes(460..465), + }, + ], + span: #8 bytes(458..467), + }, + Punct { + ch: ';', + spacing: Alone, + span: #8 bytes(467..468), + }, + Literal { + kind: Integer, + symbol: "0", + suffix: None, + span: #8 bytes(469..470), + }, + ], + span: #8 bytes(448..472), + }, + Punct { + ch: ',', + spacing: Alone, + span: #8 bytes(472..473), + }, + ], + span: #8 bytes(430..483), + }, +] +PRINT-DERIVE INPUT (DISPLAY): enum E { V = { let _ = { { } } ; 0 }, } +PRINT-DERIVE INPUT (DEBUG): TokenStream [ + Ident { + ident: "enum", + span: #12 bytes(423..427), + }, + Ident { + ident: "E", + span: #12 bytes(428..429), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "V", + span: #12 bytes(444..445), + }, + Punct { + ch: '=', + spacing: Alone, + span: #12 bytes(446..447), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "let", + span: #12 bytes(450..453), + }, + Ident { + ident: "_", + span: #12 bytes(454..455), + }, + Punct { + ch: '=', + spacing: Alone, + span: #12 bytes(456..457), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Group { + delimiter: None, + stream: TokenStream [ + Group { + delimiter: Brace, + stream: TokenStream [], + span: #0 bytes(596..598), + }, + ], + span: #12 bytes(460..465), + }, + ], + span: #12 bytes(458..467), + }, + Punct { + ch: ';', + spacing: Alone, + span: #12 bytes(467..468), + }, + Literal { + kind: Integer, + symbol: "0", + suffix: None, + span: #12 bytes(469..470), + }, + ], + span: #12 bytes(448..472), + }, + Punct { + ch: ',', + spacing: Alone, + span: #12 bytes(472..473), + }, + ], + span: #12 bytes(430..483), + }, +] +PRINT-DERIVE INPUT (DISPLAY): enum E { V = { let _ = { PATH; } ; 0 }, } +PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): enum E { V = { let _ = { PATH } ; 0 }, } +PRINT-DERIVE INPUT (DEBUG): TokenStream [ + Ident { + ident: "enum", + span: #16 bytes(423..427), + }, + Ident { + ident: "E", + span: #16 bytes(428..429), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "V", + span: #16 bytes(444..445), + }, + Punct { + ch: '=', + spacing: Alone, + span: #16 bytes(446..447), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "let", + span: #16 bytes(450..453), + }, + Ident { + ident: "_", + span: #16 bytes(454..455), + }, + Punct { + ch: '=', + spacing: Alone, + span: #16 bytes(456..457), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Group { + delimiter: None, + stream: TokenStream [ + Ident { + ident: "PATH", + span: #0 bytes(615..619), + }, + ], + span: #16 bytes(460..465), + }, + ], + span: #16 bytes(458..467), + }, + Punct { + ch: ';', + spacing: Alone, + span: #16 bytes(467..468), + }, + Literal { + kind: Integer, + symbol: "0", + suffix: None, + span: #16 bytes(469..470), + }, + ], + span: #16 bytes(448..472), + }, + Punct { + ch: ',', + spacing: Alone, + span: #16 bytes(472..473), + }, + ], + span: #16 bytes(430..483), + }, +] +PRINT-DERIVE INPUT (DISPLAY): enum E { V = { let _ = { 0 + 1; } ; 0 }, } +PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): enum E { V = { let _ = { 0 + 1 } ; 0 }, } +PRINT-DERIVE INPUT (DEBUG): TokenStream [ + Ident { + ident: "enum", + span: #20 bytes(423..427), + }, + Ident { + ident: "E", + span: #20 bytes(428..429), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "V", + span: #20 bytes(444..445), + }, + Punct { + ch: '=', + spacing: Alone, + span: #20 bytes(446..447), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "let", + span: #20 bytes(450..453), + }, + Ident { + ident: "_", + span: #20 bytes(454..455), + }, + Punct { + ch: '=', + spacing: Alone, + span: #20 bytes(456..457), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Group { + delimiter: None, + stream: TokenStream [ + Literal { + kind: Integer, + symbol: "0", + suffix: None, + span: #0 bytes(636..637), + }, + Punct { + ch: '+', + spacing: Alone, + span: #0 bytes(638..639), + }, + Literal { + kind: Integer, + symbol: "1", + suffix: None, + span: #0 bytes(640..641), + }, + ], + span: #20 bytes(460..465), + }, + ], + span: #20 bytes(458..467), + }, + Punct { + ch: ';', + spacing: Alone, + span: #20 bytes(467..468), + }, + Literal { + kind: Integer, + symbol: "0", + suffix: None, + span: #20 bytes(469..470), + }, + ], + span: #20 bytes(448..472), + }, + Punct { + ch: ',', + spacing: Alone, + span: #20 bytes(472..473), + }, + ], + span: #20 bytes(430..483), + }, +] +PRINT-DERIVE INPUT (DISPLAY): enum E { V = { let _ = { PATH + 1; } ; 0 }, } +PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): enum E { V = { let _ = { PATH + 1 } ; 0 }, } +PRINT-DERIVE INPUT (DEBUG): TokenStream [ + Ident { + ident: "enum", + span: #24 bytes(423..427), + }, + Ident { + ident: "E", + span: #24 bytes(428..429), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "V", + span: #24 bytes(444..445), + }, + Punct { + ch: '=', + spacing: Alone, + span: #24 bytes(446..447), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Ident { + ident: "let", + span: #24 bytes(450..453), + }, + Ident { + ident: "_", + span: #24 bytes(454..455), + }, + Punct { + ch: '=', + spacing: Alone, + span: #24 bytes(456..457), + }, + Group { + delimiter: Brace, + stream: TokenStream [ + Group { + delimiter: None, + stream: TokenStream [ + Ident { + ident: "PATH", + span: #0 bytes(658..662), + }, + Punct { + ch: '+', + spacing: Alone, + span: #0 bytes(663..664), + }, + Literal { + kind: Integer, + symbol: "1", + suffix: None, + span: #0 bytes(665..666), + }, + ], + span: #24 bytes(460..465), + }, + ], + span: #24 bytes(458..467), + }, + Punct { + ch: ';', + spacing: Alone, + span: #24 bytes(467..468), + }, + Literal { + kind: Integer, + symbol: "0", + suffix: None, + span: #24 bytes(469..470), + }, + ], + span: #24 bytes(448..472), + }, + Punct { + ch: ',', + spacing: Alone, + span: #24 bytes(472..473), + }, + ], + span: #24 bytes(430..483), + }, +]