rust-lang · Centril · Feb 23, 2020 · Feb 24, 2020 · estebank · Feb 25, 2020
diff --git a/src/librustc_expand/expand.rs b/src/librustc_expand/expand.rs
@@ -892,7 +892,7 @@ pub fn parse_ast_fragment<'a>(
             let mut stmts = SmallVec::new();
             // Won't make progress on a `}`.
             while this.token != token::Eof && this.token != token::CloseDelim(token::Brace) {
-                if let Some(stmt) = this.parse_full_stmt()? {
+                if let Some(stmt) = this.parse_full_stmt(false)? {
                     stmts.push(stmt);
                 }
             }

diff --git a/src/librustc_parse/parser/item.rs b/src/librustc_parse/parser/item.rs
@@ -30,7 +30,7 @@ impl<'a> Parser<'a> {
 
     fn parse_item_(&mut self, req_name: ReqName) -> PResult<'a, Option<Item>> {
         let attrs = self.parse_outer_attributes()?;
-        self.parse_item_common(attrs, true, false, req_name)
+        self.parse_item_common(attrs, true, false, req_name, true)
     }
 
     pub(super) fn parse_item_common(
@@ -39,6 +39,7 @@ impl<'a> Parser<'a> {
         mac_allowed: bool,
         attrs_allowed: bool,
         req_name: ReqName,
+        mod_stmt: bool,
     ) -> PResult<'a, Option<Item>> {
         maybe_whole!(self, NtItem, |item| {
             let mut item = item;
@@ -49,9 +50,9 @@ impl<'a> Parser<'a> {
 
         let mut unclosed_delims = vec![];
         let (mut item, tokens) = self.collect_tokens(|this| {
-            let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name);
+            let i = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name, mod_stmt);
             unclosed_delims.append(&mut this.unclosed_delims);
-            item
+            i
         })?;
         self.unclosed_delims.append(&mut unclosed_delims);
 
@@ -83,11 +84,13 @@ impl<'a> Parser<'a> {
         mac_allowed: bool,
         attrs_allowed: bool,
         req_name: ReqName,
+        mod_stmt: bool,
     ) -> PResult<'a, Option<Item>> {
         let lo = self.token.span;
         let vis = self.parse_visibility(FollowedByType::No)?;
         let mut def = self.parse_defaultness();
-        let kind = self.parse_item_kind(&mut attrs, mac_allowed, lo, &vis, &mut def, req_name)?;
+        let kind =
+            self.parse_item_kind(&mut attrs, mac_allowed, lo, &vis, &mut def, req_name, mod_stmt)?;
         if let Some((ident, kind)) = kind {
             self.error_on_unconsumed_default(def, &kind);
             let span = lo.to(self.prev_span);
@@ -148,6 +151,7 @@ impl<'a> Parser<'a> {
         vis: &Visibility,
         def: &mut Defaultness,
         req_name: ReqName,
+        mod_stmt: bool,
     ) -> PResult<'a, Option<ItemInfo>> {
         let mut def = || mem::replace(def, Defaultness::Final);
 
@@ -212,9 +216,13 @@ impl<'a> Parser<'a> {
         } else if vis.node.is_pub() && self.isnt_macro_invocation() {
             self.recover_missing_kw_before_item()?;
             return Ok(None);
-        } else if macros_allowed && self.token.is_path_start() {
+        } else if let Some(kind) = if macros_allowed && self.token.is_path_start() {
+            self.parse_item_macro(vis, mod_stmt)?
+        } else {
+            None
+        } {
             // MACRO INVOCATION ITEM
-            (Ident::invalid(), ItemKind::Mac(self.parse_item_macro(vis)?))
+            (Ident::invalid(), ItemKind::Mac(kind))
         } else {
-        } else {
+        } else if self.maybe_consume_incorrect_semicolon(&[]) {
+            let lo = self.token.span;
+            self.parse_item_kind(attrs, macros_allowed, lo, vis, def, req_name, mod_stmt)?
+        } else {
-        } else {
+        } else if self.maybe_consume_incorrect_semicolon(&[]) {
+            let lo = self.token.span;
+            self.parse_item_kind(attrs, macros_allowed, lo, vis, def, req_name, mod_stmt)?
+        } else {
             return Ok(None);
         };
@@ -333,13 +341,36 @@ impl<'a> Parser<'a> {
     }
 
     /// Parses an item macro, e.g., `item!();`.
-    fn parse_item_macro(&mut self, vis: &Visibility) -> PResult<'a, Mac> {
-        let path = self.parse_path(PathStyle::Mod)?; // `foo::bar`
-        self.expect(&token::Not)?; // `!`
+    fn parse_item_macro(&mut self, vis: &Visibility, mod_stmt: bool) -> PResult<'a, Option<Mac>> {
+        let parse_prefix = |p: &mut Self| -> PResult<'a, ast::Path> {
+            let path = p.parse_path(PathStyle::Mod)?; // `foo::bar`
+            p.expect(&token::Not)?; // `!`
+            Ok(path)
+        };
+        let path = if mod_stmt {
+            // We're in statement-as-module-item recovery mode.
+            // To avoid "stealing" syntax from e.g. `x.f()` as a module-level statement,
+            // we backtrack if we failed to parse `$path!`; after we have, we commit firmly.
+            // This is only done when `mod_stmt` holds to avoid backtracking inside functions.
+            let snapshot = self.clone();
+            match parse_prefix(self) {
+                Ok(path) => path,
+                Err(mut err) => {
+                    // Assert that this is only for diagnostics!
+                    // This is a safeguard against breaking LL(k) accidentally in the spec,
+                    // assuming no one has gated the syntax with something like `#[cfg(FALSE)]`.
+                    err.delay_as_bug();
+                    *self = snapshot;
+                    return Ok(None);
+                }
+            }
+        } else {
+            parse_prefix(self)?
+        };
         let args = self.parse_mac_args()?; // `( .. )` or `[ .. ]` (followed by `;`), or `{ .. }`.
         self.eat_semi_for_macro_if_needed(&args);
         self.complain_if_pub_macro(vis, false);
-        Ok(Mac { path, args, prior_type_ascription: self.last_type_ascription })
+        Ok(Some(Mac { path, args, prior_type_ascription: self.last_type_ascription }))
     }
 
     /// Recover if we parsed attributes and expected an item but there was none.

diff --git a/src/librustc_parse/parser/module.rs b/src/librustc_parse/parser/module.rs
@@ -4,12 +4,15 @@ use super::Parser;
 
 use crate::{new_sub_parser_from_file, DirectoryOwnership};
 
-use rustc_errors::PResult;
-use rustc_span::source_map::{FileName, SourceMap, Span, DUMMY_SP};
+use rustc_ast_pretty::pprust;
+use rustc_errors::{Applicability, PResult};
+use rustc_span::source_map::{respan, FileName, MultiSpan, SourceMap, Span, DUMMY_SP};
 use rustc_span::symbol::sym;
 use syntax::ast::{self, Attribute, Crate, Ident, ItemKind, Mod};
 use syntax::attr;
+use syntax::ptr::P;
 use syntax::token::{self, TokenKind};
+use syntax::visit::Visitor;
 
 use std::path::{self, Path, PathBuf};
 
@@ -75,9 +78,12 @@ impl<'a> Parser<'a> {
     /// Given a termination token, parses all of the items in a module.
     fn parse_mod_items(&mut self, term: &TokenKind, inner_lo: Span) -> PResult<'a, Mod> {
         let mut items = vec![];
-        while let Some(item) = self.parse_item()? {
-            items.push(item);
-            self.maybe_consume_incorrect_semicolon(&items);
+        let mut stuck = false;
+        while let Some(res) = self.parse_item_in_mod(term, &mut stuck)? {
+            if let Some(item) = res {
+                items.push(item);
+                self.maybe_consume_incorrect_semicolon(&items);
+            }
         }
 
         if !self.eat(term) {
@@ -95,6 +101,138 @@ impl<'a> Parser<'a> {
         Ok(Mod { inner: inner_lo.to(hi), items, inline: true })
     }
 
+    fn parse_item_in_mod(
+        &mut self,
+        term: &TokenKind,
+        stuck: &mut bool,
+    ) -> PResult<'a, Option<Option<P<ast::Item>>>> {
+        match self.parse_item()? {
+            // We just made progress and we might have statements following this item.
+            i @ Some(_) => {
+                *stuck = false;
+                Ok(Some(i))
+            }
+            // No progress and the previous attempt at statements failed, so terminate the loop.
+            None if *stuck => Ok(None),
+            None => Ok(self.recover_stmts_as_item(term, stuck)?.then_some(None)),
+        }
+    }
+
+    /// Parse a contiguous list of statements until we reach the terminating token or EOF.
+    /// When any statements were parsed, perform recovery and suggest wrapping the statements
+    /// inside a function. If `stuck` becomes `true`, then this method should not be called
+    /// unless we have advanced the cursor.
+    fn recover_stmts_as_item(&mut self, term: &TokenKind, stuck: &mut bool) -> PResult<'a, bool> {
+        let lo = self.token.span;
+        let mut stmts = vec![];
+        while ![term, &token::Eof].contains(&&self.token.kind) {
+            let old_expected = std::mem::take(&mut self.expected_tokens);
+            let snapshot = self.clone();
+            let stmt = self.parse_full_stmt(true);
+            self.expected_tokens = old_expected; // Restore expected tokens to before recovery.
+            match stmt {
+                Ok(None) => break,
+                Ok(Some(stmt)) => stmts.push(stmt),
+                Err(mut err) => {
+                    // We couldn't parse as a statement. Rewind to the last one we could for.
+                    // Also notify the caller that we made no progress, meaning that the method
+                    // should not be called again to avoid non-termination.
+                    err.cancel();
+                    *self = snapshot;
+                    *stuck = true;
+                    break;
+                }
+            }
+        }
+
+        let recovered = !stmts.is_empty();
+        if recovered {
+            // We parsed some statements and have recovered, so let's emit an error.
+            self.error_stmts_as_item_suggest_fn(lo, stmts);
+        }
+        Ok(recovered)
+    }
+
+    fn error_stmts_as_item_suggest_fn(&self, lo: Span, stmts: Vec<ast::Stmt>) {
+        use syntax::ast::*;
+
+        let span = lo.to(self.prev_span);
+        let spans: MultiSpan = match &*stmts {
+            [] | [_] => span.into(),
+            [x, .., y] => vec![x.span, y.span].into(),
+        };
+
+        // Perform coarse grained inference about returns.
+        // We use this to tell whether `main` is an acceptable name
+        // and if `-> _` or `-> Result<_, _>` should be used instead of defaulting to unit.
+        #[derive(Default)]
+        struct RetInfer(bool, bool, bool);
+        let RetInfer(has_ret_unit, has_ret_expr, has_try_expr) = {
+            impl Visitor<'_> for RetInfer {
+                fn visit_expr_post(&mut self, expr: &Expr) {
+                    match expr.kind {
+                        ExprKind::Ret(None) => self.0 = true,    // `return`
+                        ExprKind::Ret(Some(_)) => self.1 = true, // `return $expr`
+                        ExprKind::Try(_) => self.2 = true,       // `expr?`
+                        _ => {}
+                    }
+                }
+            }
+            let mut visitor = RetInfer::default();
+            for stmt in &stmts {
+                visitor.visit_stmt(stmt);
+            }
+            if let StmtKind::Expr(_) = &stmts.last().unwrap().kind {
+                visitor.1 = true; // The tail expression.
+            }
+            visitor
+        };
+
+        // For the function name, use `main` if we are in `main.rs`, and `my_function` otherwise.
+        let use_main = (has_ret_unit || has_try_expr)
+            && self.directory.path.file_stem() == Some(std::ffi::OsStr::new("main"));
+        let ident = Ident::from_str_and_span(if use_main { "main" } else { "my_function" }, span);
+
+        // Construct the return type; either default, `-> _`, or `-> Result<_, _>`.
+        let output = match (has_ret_unit, has_ret_expr, has_try_expr) {
+            // `-> ()`; We either had `return;`, so return type is unit, or nothing was returned.
+            (true, _, _) | (false, false, false) => FnRetTy::Default(span),
+            // `-> Result<_, _>`; We had `?` somewhere so `-> Result<_, _>` is a good bet.
+            (_, _, true) => {
+                let arg = GenericArg::Type(self.mk_ty(span, TyKind::Infer));
+                let args = [arg.clone(), arg].to_vec();
+                let args = AngleBracketedArgs { span, constraints: vec![], args };
+                let mut path = Path::from_ident(Ident::from_str_and_span("Result", span));
+                path.segments[0].args = Some(P(GenericArgs::AngleBracketed(args)));
+                FnRetTy::Ty(self.mk_ty(span, TyKind::Path(None, path)))
+            }
+            // `-> _`; We had `return $expr;` so it's probably not `()` as return type.
+            (_, true, _) => FnRetTy::Ty(self.mk_ty(span, TyKind::Infer)),
+        };
+
+        // Finalize the AST for the function item: `fn $ident() $output { $stmts }`.
+        let sig = FnSig { header: FnHeader::default(), decl: P(FnDecl { inputs: vec![], output }) };
+        let body = self.mk_block(stmts, BlockCheckMode::Default, span);
+        let kind = ItemKind::Fn(Defaultness::Final, sig, Generics::default(), Some(body));
+        let vis = respan(span, VisibilityKind::Inherited);
+        let item = Item { span, ident, vis, kind, attrs: vec![], id: DUMMY_NODE_ID, tokens: None };
+
+        // Emit the error with a suggestion to wrap the statements in the function.
+        let mut err = self.struct_span_err(spans, "statements cannot reside in modules");
+        err.span_suggestion_verbose(
+            span,
+            "consider moving the statements into a function",
+            pprust::item_to_string(&item),
+            Applicability::HasPlaceholders,
+        );
+        err.note("the program entry point starts in `fn main() { ... }`, defined in `main.rs`");
+        err.note(
+            "for more on functions and how to structure your program, \
+                see https://doc.rust-lang.org/book/ch03-03-how-functions-work.html",
+        );
+        err.emit();
+    }
+
     fn submod_path(
         &mut self,
         id: ast::Ident,

diff --git a/src/librustc_parse/parser/stmt.rs b/src/librustc_parse/parser/stmt.rs
@@ -22,14 +22,14 @@ impl<'a> Parser<'a> {
     /// Parses a statement. This stops just before trailing semicolons on everything but items.
     /// e.g., a `StmtKind::Semi` parses to a `StmtKind::Expr`, leaving the trailing `;` unconsumed.
     pub fn parse_stmt(&mut self) -> PResult<'a, Option<Stmt>> {
-        Ok(self.parse_stmt_without_recovery().unwrap_or_else(|mut e| {
+        Ok(self.parse_stmt_without_recovery(false).unwrap_or_else(|mut e| {
             e.emit();
             self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore);
             None
         }))
     }
 
-    fn parse_stmt_without_recovery(&mut self) -> PResult<'a, Option<Stmt>> {
+    fn parse_stmt_without_recovery(&mut self, module_stmt: bool) -> PResult<'a, Option<Stmt>> {
         maybe_whole!(self, NtStmt, |x| Some(x));
 
         let attrs = self.parse_outer_attributes()?;
@@ -56,7 +56,10 @@ impl<'a> Parser<'a> {
             // that starts like a path (1 token), but it fact not a path.
             // Also, we avoid stealing syntax from `parse_item_`.
             self.parse_stmt_path_start(lo, attrs)?
-        } else if let Some(item) = self.parse_stmt_item(attrs.clone())? {
+        } else if let Some(item) =
+            // When parsing the statement as a module (recovery), avoid parsing items.
+            if module_stmt { None } else { self.parse_stmt_item(attrs.clone())? }
+        {
             // FIXME: Bad copy of attrs
             self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
         } else if self.token == token::Semi {
@@ -89,7 +92,7 @@ impl<'a> Parser<'a> {
 
     fn parse_stmt_item(&mut self, attrs: Vec<Attribute>) -> PResult<'a, Option<ast::Item>> {
         let old = mem::replace(&mut self.directory.ownership, DirectoryOwnership::UnownedViaBlock);
-        let item = self.parse_item_common(attrs.clone(), false, true, |_| true)?;
+        let item = self.parse_item_common(attrs.clone(), false, true, |_| true, false)?;
         self.directory.ownership = old;
         Ok(item)
     }
@@ -275,7 +278,7 @@ impl<'a> Parser<'a> {
         //      bar;
         //
         // which is valid in other languages, but not Rust.
-        match self.parse_stmt_without_recovery() {
+        match self.parse_stmt_without_recovery(true) {
             Ok(Some(stmt)) => {
                 if self.look_ahead(1, |t| t == &token::OpenDelim(token::Brace))
                     || do_not_suggest_help
@@ -334,7 +337,7 @@ impl<'a> Parser<'a> {
             if self.token == token::Eof {
                 break;
             }
-            let stmt = match self.parse_full_stmt() {
+            let stmt = match self.parse_full_stmt(false) {
                 Err(mut err) => {
                     self.maybe_annotate_with_ascription(&mut err, false);
                     err.emit();
@@ -354,20 +357,23 @@ impl<'a> Parser<'a> {
     }
 
     /// Parses a statement, including the trailing semicolon.
-    pub fn parse_full_stmt(&mut self) -> PResult<'a, Option<Stmt>> {
+    pub fn parse_full_stmt(&mut self, module_stmt: bool) -> PResult<'a, Option<Stmt>> {
         // Skip looking for a trailing semicolon when we have an interpolated statement.
         maybe_whole!(self, NtStmt, |x| Some(x));
 
-        let mut stmt = match self.parse_stmt_without_recovery()? {
+        let mut stmt = match self.parse_stmt_without_recovery(module_stmt)? {
             Some(stmt) => stmt,
             None => return Ok(None),
         };
 
         let mut eat_semi = true;
         match stmt.kind {
-            // Expression without semicolon.
             StmtKind::Expr(ref expr)
-                if self.token != token::Eof && classify::expr_requires_semi_to_be_stmt(expr) =>
+                // Expression without semicolon.
+                if self.token != token::Eof
+                    && classify::expr_requires_semi_to_be_stmt(expr)
+                    // Do not error here if in `module_stmt` recovery mode.
+                    && !module_stmt =>
             {
                 // Just check for errors and recover; do not eat semicolon yet.
                 if let Err(mut e) =