Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EXPERIMENT: Recover on stmts/exprs at module level, suggesting to wrap in function #69445

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/librustc_expand/expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,7 @@ pub fn parse_ast_fragment<'a>(
let mut stmts = SmallVec::new();
// Won't make progress on a `}`.
while this.token != token::Eof && this.token != token::CloseDelim(token::Brace) {
if let Some(stmt) = this.parse_full_stmt()? {
if let Some(stmt) = this.parse_full_stmt(false)? {
stmts.push(stmt);
}
}
Expand Down
51 changes: 41 additions & 10 deletions src/librustc_parse/parser/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ impl<'a> Parser<'a> {

fn parse_item_(&mut self, req_name: ReqName) -> PResult<'a, Option<Item>> {
let attrs = self.parse_outer_attributes()?;
self.parse_item_common(attrs, true, false, req_name)
self.parse_item_common(attrs, true, false, req_name, true)
}

pub(super) fn parse_item_common(
Expand All @@ -39,6 +39,7 @@ impl<'a> Parser<'a> {
mac_allowed: bool,
attrs_allowed: bool,
req_name: ReqName,
mod_stmt: bool,
) -> PResult<'a, Option<Item>> {
maybe_whole!(self, NtItem, |item| {
let mut item = item;
Expand All @@ -49,9 +50,9 @@ impl<'a> Parser<'a> {

let mut unclosed_delims = vec![];
let (mut item, tokens) = self.collect_tokens(|this| {
let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name);
let i = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name, mod_stmt);
unclosed_delims.append(&mut this.unclosed_delims);
item
i
})?;
self.unclosed_delims.append(&mut unclosed_delims);

Expand Down Expand Up @@ -83,11 +84,13 @@ impl<'a> Parser<'a> {
mac_allowed: bool,
attrs_allowed: bool,
req_name: ReqName,
mod_stmt: bool,
) -> PResult<'a, Option<Item>> {
let lo = self.token.span;
let vis = self.parse_visibility(FollowedByType::No)?;
let mut def = self.parse_defaultness();
let kind = self.parse_item_kind(&mut attrs, mac_allowed, lo, &vis, &mut def, req_name)?;
let kind =
self.parse_item_kind(&mut attrs, mac_allowed, lo, &vis, &mut def, req_name, mod_stmt)?;
if let Some((ident, kind)) = kind {
self.error_on_unconsumed_default(def, &kind);
let span = lo.to(self.prev_span);
Expand Down Expand Up @@ -148,6 +151,7 @@ impl<'a> Parser<'a> {
vis: &Visibility,
def: &mut Defaultness,
req_name: ReqName,
mod_stmt: bool,
) -> PResult<'a, Option<ItemInfo>> {
let mut def = || mem::replace(def, Defaultness::Final);

Expand Down Expand Up @@ -212,9 +216,13 @@ impl<'a> Parser<'a> {
} else if vis.node.is_pub() && self.isnt_macro_invocation() {
self.recover_missing_kw_before_item()?;
return Ok(None);
} else if macros_allowed && self.token.is_path_start() {
} else if let Some(kind) = if macros_allowed && self.token.is_path_start() {
self.parse_item_macro(vis, mod_stmt)?
} else {
None
} {
// MACRO INVOCATION ITEM
(Ident::invalid(), ItemKind::Mac(self.parse_item_macro(vis)?))
(Ident::invalid(), ItemKind::Mac(kind))
} else {
Comment on lines +221 to 226
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem right (} else { None } { (...) } else {)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have preferred to write this with let-chains but they ain't implemented yet sadly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@estebank we could do something like:

Suggested change
} else {
} else if self.maybe_consume_incorrect_semicolon(&[]) {
let lo = self.token.span;
self.parse_item_kind(attrs, macros_allowed, lo, vis, def, req_name, mod_stmt)?
} else {

return Ok(None);
};
Expand Down Expand Up @@ -333,13 +341,36 @@ impl<'a> Parser<'a> {
}

/// Parses an item macro, e.g., `item!();`.
fn parse_item_macro(&mut self, vis: &Visibility) -> PResult<'a, Mac> {
let path = self.parse_path(PathStyle::Mod)?; // `foo::bar`
self.expect(&token::Not)?; // `!`
fn parse_item_macro(&mut self, vis: &Visibility, mod_stmt: bool) -> PResult<'a, Option<Mac>> {
let parse_prefix = |p: &mut Self| -> PResult<'a, ast::Path> {
let path = p.parse_path(PathStyle::Mod)?; // `foo::bar`
p.expect(&token::Not)?; // `!`
Ok(path)
};
let path = if mod_stmt {
// We're in statement-as-module-item recovery mode.
// To avoid "stealing" syntax from e.g. `x.f()` as a module-level statement,
// we backtrack if we failed to parse `$path!`; after we have, we commit firmly.
// This is only done when `mod_stmt` holds to avoid backtracking inside functions.
let snapshot = self.clone();
match parse_prefix(self) {
Ok(path) => path,
Err(mut err) => {
// Assert that this is only for diagnostics!
// This is a safeguard against breaking LL(k) accidentally in the spec,
// assuming no one has gated the syntax with something like `#[cfg(FALSE)]`.
err.delay_as_bug();
*self = snapshot;
return Ok(None);
Comment on lines +351 to +364
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This use of backtracking bears some discussion. It only happens outside of function bodies to avoid regressing perf or accepting more syntax. In the case of module level, if we hit EOF we will not even attempt parsing an item so it shouldn't have a perf implication that way?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative to this might be bounded look-ahead for some fixed k, but that is ostensibly less good for diagnostics and trickier to encode correctly. Path parsing is also rarely long and $path! is a fairly unique sequence of tokens.

Copy link
Contributor

@estebank estebank Feb 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add another closure or fn can_begin_macro that checks one token for is_ident and two tokens for either :: or !, and only clone if neither are true. That would keep the cost of cloning down for the happy path at the cost of marginally worse diagnostics.

}
}
} else {
parse_prefix(self)?
};
let args = self.parse_mac_args()?; // `( .. )` or `[ .. ]` (followed by `;`), or `{ .. }`.
self.eat_semi_for_macro_if_needed(&args);
self.complain_if_pub_macro(vis, false);
Ok(Mac { path, args, prior_type_ascription: self.last_type_ascription })
Ok(Some(Mac { path, args, prior_type_ascription: self.last_type_ascription }))
}

/// Recover if we parsed attributes and expected an item but there was none.
Expand Down
148 changes: 143 additions & 5 deletions src/librustc_parse/parser/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ use super::Parser;

use crate::{new_sub_parser_from_file, DirectoryOwnership};

use rustc_errors::PResult;
use rustc_span::source_map::{FileName, SourceMap, Span, DUMMY_SP};
use rustc_ast_pretty::pprust;
use rustc_errors::{Applicability, PResult};
use rustc_span::source_map::{respan, FileName, MultiSpan, SourceMap, Span, DUMMY_SP};
use rustc_span::symbol::sym;
use syntax::ast::{self, Attribute, Crate, Ident, ItemKind, Mod};
use syntax::attr;
use syntax::ptr::P;
use syntax::token::{self, TokenKind};
use syntax::visit::Visitor;

use std::path::{self, Path, PathBuf};

Expand Down Expand Up @@ -75,9 +78,12 @@ impl<'a> Parser<'a> {
/// Given a termination token, parses all of the items in a module.
fn parse_mod_items(&mut self, term: &TokenKind, inner_lo: Span) -> PResult<'a, Mod> {
let mut items = vec![];
while let Some(item) = self.parse_item()? {
items.push(item);
self.maybe_consume_incorrect_semicolon(&items);
let mut stuck = false;
while let Some(res) = self.parse_item_in_mod(term, &mut stuck)? {
if let Some(item) = res {
items.push(item);
self.maybe_consume_incorrect_semicolon(&items);
}
}

if !self.eat(term) {
Expand All @@ -95,6 +101,138 @@ impl<'a> Parser<'a> {
Ok(Mod { inner: inner_lo.to(hi), items, inline: true })
}

fn parse_item_in_mod(
&mut self,
term: &TokenKind,
stuck: &mut bool,
) -> PResult<'a, Option<Option<P<ast::Item>>>> {
match self.parse_item()? {
// We just made progress and we might have statements following this item.
i @ Some(_) => {
*stuck = false;
Ok(Some(i))
}
// No progress and the previous attempt at statements failed, so terminate the loop.
None if *stuck => Ok(None),
None => Ok(self.recover_stmts_as_item(term, stuck)?.then_some(None)),
}
}

/// Parse a contiguous list of statements until we reach the terminating token or EOF.
/// When any statements were parsed, perform recovery and suggest wrapping the statements
/// inside a function. If `stuck` becomes `true`, then this method should not be called
/// unless we have advanced the cursor.
fn recover_stmts_as_item(&mut self, term: &TokenKind, stuck: &mut bool) -> PResult<'a, bool> {
let lo = self.token.span;
let mut stmts = vec![];
while ![term, &token::Eof].contains(&&self.token.kind) {
let old_expected = std::mem::take(&mut self.expected_tokens);
let snapshot = self.clone();
let stmt = self.parse_full_stmt(true);
self.expected_tokens = old_expected; // Restore expected tokens to before recovery.
match stmt {
Ok(None) => break,
Ok(Some(stmt)) => stmts.push(stmt),
Err(mut err) => {
// We couldn't parse as a statement. Rewind to the last one we could for.
// Also notify the caller that we made no progress, meaning that the method
// should not be called again to avoid non-termination.
err.cancel();
*self = snapshot;
*stuck = true;
break;
}
}
}

let recovered = !stmts.is_empty();
if recovered {
// We parsed some statements and have recovered, so let's emit an error.
self.error_stmts_as_item_suggest_fn(lo, stmts);
}
Ok(recovered)
}

fn error_stmts_as_item_suggest_fn(&self, lo: Span, stmts: Vec<ast::Stmt>) {
use syntax::ast::*;

let span = lo.to(self.prev_span);
let spans: MultiSpan = match &*stmts {
[] | [_] => span.into(),
[x, .., y] => vec![x.span, y.span].into(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing that you didn't like the multiline span output for these?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what you mean; in the first arm the span spans as long as [first.span] would and in the case of [] it's unreachable (but I didn't unreachable!() because it's more code).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean that the alternative was a single x.span.until(y.span).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we want .until(...) though? That would leave out e.g. the end of the last statement? I would expect x.to(y) as the alternative, but that's equivalent to span.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switching to just span in all cases then. :)

};

// Perform coarse grained inference about returns.
// We use this to tell whether `main` is an acceptable name
// and if `-> _` or `-> Result<_, _>` should be used instead of defaulting to unit.
#[derive(Default)]
struct RetInfer(bool, bool, bool);
let RetInfer(has_ret_unit, has_ret_expr, has_try_expr) = {
impl Visitor<'_> for RetInfer {
fn visit_expr_post(&mut self, expr: &Expr) {
match expr.kind {
ExprKind::Ret(None) => self.0 = true, // `return`
ExprKind::Ret(Some(_)) => self.1 = true, // `return $expr`
ExprKind::Try(_) => self.2 = true, // `expr?`
_ => {}
}
}
}
let mut visitor = RetInfer::default();
for stmt in &stmts {
visitor.visit_stmt(stmt);
}
if let StmtKind::Expr(_) = &stmts.last().unwrap().kind {
visitor.1 = true; // The tail expression.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tail expression will be somewhat arbitrary in real code. Don't know if this part is worth it or it will be noisier than we'd wish.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Up to a point, after we've shown the user how to write a function, if they forget to add a return type we'll guide them in the right direction in subsequent errors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thinking is that worst case you get -> _ and you write that, and the compiler tells you to write -> () cause it's inferring the return type.

}
visitor
};

// For the function name, use `main` if we are in `main.rs`, and `my_function` otherwise.
let use_main = (has_ret_unit || has_try_expr)
&& self.directory.path.file_stem() == Some(std::ffi::OsStr::new("main"));
let ident = Ident::from_str_and_span(if use_main { "main" } else { "my_function" }, span);

// Construct the return type; either default, `-> _`, or `-> Result<_, _>`.
let output = match (has_ret_unit, has_ret_expr, has_try_expr) {
// `-> ()`; We either had `return;`, so return type is unit, or nothing was returned.
(true, _, _) | (false, false, false) => FnRetTy::Default(span),
// `-> Result<_, _>`; We had `?` somewhere so `-> Result<_, _>` is a good bet.
(_, _, true) => {
let arg = GenericArg::Type(self.mk_ty(span, TyKind::Infer));
let args = [arg.clone(), arg].to_vec();
let args = AngleBracketedArgs { span, constraints: vec![], args };
let mut path = Path::from_ident(Ident::from_str_and_span("Result", span));
path.segments[0].args = Some(P(GenericArgs::AngleBracketed(args)));
FnRetTy::Ty(self.mk_ty(span, TyKind::Path(None, path)))
}
// `-> _`; We had `return $expr;` so it's probably not `()` as return type.
(_, true, _) => FnRetTy::Ty(self.mk_ty(span, TyKind::Infer)),
};

// Finalize the AST for the function item: `fn $ident() $output { $stmts }`.
let sig = FnSig { header: FnHeader::default(), decl: P(FnDecl { inputs: vec![], output }) };
let body = self.mk_block(stmts, BlockCheckMode::Default, span);
let kind = ItemKind::Fn(Defaultness::Final, sig, Generics::default(), Some(body));
let vis = respan(span, VisibilityKind::Inherited);
let item = Item { span, ident, vis, kind, attrs: vec![], id: DUMMY_NODE_ID, tokens: None };

// Emit the error with a suggestion to wrap the statements in the function.
let mut err = self.struct_span_err(spans, "statements cannot reside in modules");
err.span_suggestion_verbose(
span,
"consider moving the statements into a function",
pprust::item_to_string(&item),
Applicability::HasPlaceholders,
);
err.note("the program entry point starts in `fn main() { ... }`, defined in `main.rs`");
err.note(
"for more on functions and how to structure your program, \
see https://doc.rust-lang.org/book/ch03-03-how-functions-work.html",
);
err.emit();
}

fn submod_path(
&mut self,
id: ast::Ident,
Expand Down
26 changes: 16 additions & 10 deletions src/librustc_parse/parser/stmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ impl<'a> Parser<'a> {
/// Parses a statement. This stops just before trailing semicolons on everything but items.
/// e.g., a `StmtKind::Semi` parses to a `StmtKind::Expr`, leaving the trailing `;` unconsumed.
pub fn parse_stmt(&mut self) -> PResult<'a, Option<Stmt>> {
Ok(self.parse_stmt_without_recovery().unwrap_or_else(|mut e| {
Ok(self.parse_stmt_without_recovery(false).unwrap_or_else(|mut e| {
e.emit();
self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore);
None
}))
}

fn parse_stmt_without_recovery(&mut self) -> PResult<'a, Option<Stmt>> {
fn parse_stmt_without_recovery(&mut self, module_stmt: bool) -> PResult<'a, Option<Stmt>> {
maybe_whole!(self, NtStmt, |x| Some(x));

let attrs = self.parse_outer_attributes()?;
Expand All @@ -56,7 +56,10 @@ impl<'a> Parser<'a> {
// that starts like a path (1 token), but it fact not a path.
// Also, we avoid stealing syntax from `parse_item_`.
self.parse_stmt_path_start(lo, attrs)?
} else if let Some(item) = self.parse_stmt_item(attrs.clone())? {
} else if let Some(item) =
// When parsing the statement as a module (recovery), avoid parsing items.
if module_stmt { None } else { self.parse_stmt_item(attrs.clone())? }
{
// FIXME: Bad copy of attrs
self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
} else if self.token == token::Semi {
Expand Down Expand Up @@ -89,7 +92,7 @@ impl<'a> Parser<'a> {

fn parse_stmt_item(&mut self, attrs: Vec<Attribute>) -> PResult<'a, Option<ast::Item>> {
let old = mem::replace(&mut self.directory.ownership, DirectoryOwnership::UnownedViaBlock);
let item = self.parse_item_common(attrs.clone(), false, true, |_| true)?;
let item = self.parse_item_common(attrs.clone(), false, true, |_| true, false)?;
self.directory.ownership = old;
Ok(item)
}
Expand Down Expand Up @@ -275,7 +278,7 @@ impl<'a> Parser<'a> {
// bar;
//
// which is valid in other languages, but not Rust.
match self.parse_stmt_without_recovery() {
match self.parse_stmt_without_recovery(true) {
Ok(Some(stmt)) => {
if self.look_ahead(1, |t| t == &token::OpenDelim(token::Brace))
|| do_not_suggest_help
Expand Down Expand Up @@ -334,7 +337,7 @@ impl<'a> Parser<'a> {
if self.token == token::Eof {
break;
}
let stmt = match self.parse_full_stmt() {
let stmt = match self.parse_full_stmt(false) {
Err(mut err) => {
self.maybe_annotate_with_ascription(&mut err, false);
err.emit();
Expand All @@ -354,20 +357,23 @@ impl<'a> Parser<'a> {
}

/// Parses a statement, including the trailing semicolon.
pub fn parse_full_stmt(&mut self) -> PResult<'a, Option<Stmt>> {
pub fn parse_full_stmt(&mut self, module_stmt: bool) -> PResult<'a, Option<Stmt>> {
// Skip looking for a trailing semicolon when we have an interpolated statement.
maybe_whole!(self, NtStmt, |x| Some(x));

let mut stmt = match self.parse_stmt_without_recovery()? {
let mut stmt = match self.parse_stmt_without_recovery(module_stmt)? {
Some(stmt) => stmt,
None => return Ok(None),
};

let mut eat_semi = true;
match stmt.kind {
// Expression without semicolon.
StmtKind::Expr(ref expr)
if self.token != token::Eof && classify::expr_requires_semi_to_be_stmt(expr) =>
// Expression without semicolon.
if self.token != token::Eof
&& classify::expr_requires_semi_to_be_stmt(expr)
// Do not error here if in `module_stmt` recovery mode.
&& !module_stmt =>
{
// Just check for errors and recover; do not eat semicolon yet.
if let Err(mut e) =
Expand Down
Loading