Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Recover wrong-cased keywords that start items #99918

Merged
merged 3 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ extern crate rustc_macros;
extern crate tracing;

pub mod util {
pub mod case;
pub mod classify;
pub mod comments;
pub mod literal;
Expand Down
10 changes: 10 additions & 0 deletions compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub use TokenKind::*;

use crate::ast;
use crate::ptr::P;
use crate::util::case::Case;

use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::sync::Lrc;
Expand Down Expand Up @@ -618,6 +619,15 @@ impl Token {
self.is_non_raw_ident_where(|id| id.name == kw)
}

/// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this token is an identifier equal to `kw` ignoring the case.
pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {
self.is_keyword(kw)
|| (case == Case::Insensitive
&& self.is_non_raw_ident_where(|id| {
id.name.as_str().to_lowercase() == kw.as_str().to_lowercase()
}))
}

pub fn is_path_segment_keyword(&self) -> bool {
self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
}
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_ast/src/util/case.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/// Whatever to ignore case (`fn` vs `Fn` vs `FN`) or not. Used for recovering.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Case {
Sensitive,
Insensitive,
}
3 changes: 2 additions & 1 deletion compiler/rustc_parse/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use core::mem;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::Spacing;
use rustc_ast::util::case::Case;
use rustc_ast::util::classify;
use rustc_ast::util::literal::LitError;
use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity};
Expand Down Expand Up @@ -2024,7 +2025,7 @@ impl<'a> Parser<'a> {
if self.eat_keyword(kw::Static) { Movability::Static } else { Movability::Movable };

let asyncness = if self.token.uninterpolated_span().rust_2018() {
self.parse_asyncness()
self.parse_asyncness(Case::Sensitive)
} else {
Async::No
};
Expand Down
91 changes: 60 additions & 31 deletions compiler/rustc_parse/src/parser/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use rustc_ast::ast::*;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter, TokenKind};
use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree};
use rustc_ast::util::case::Case;
use rustc_ast::{self as ast, AttrVec, Attribute, DUMMY_NODE_ID};
use rustc_ast::{Async, Const, Defaultness, IsAuto, Mutability, Unsafe, UseTree, UseTreeKind};
use rustc_ast::{BindingAnnotation, Block, FnDecl, FnSig, Param, SelfKind};
Expand All @@ -34,7 +35,7 @@ impl<'a> Parser<'a> {

/// Parses a `mod <foo> { ... }` or `mod <foo>;` item.
fn parse_item_mod(&mut self, attrs: &mut AttrVec) -> PResult<'a, ItemInfo> {
let unsafety = self.parse_unsafety();
let unsafety = self.parse_unsafety(Case::Sensitive);
self.expect_keyword(kw::Mod)?;
let id = self.parse_ident()?;
let mod_kind = if self.eat(&token::Semi) {
Expand Down Expand Up @@ -143,8 +144,15 @@ impl<'a> Parser<'a> {
let lo = self.token.span;
let vis = self.parse_visibility(FollowedByType::No)?;
let mut def = self.parse_defaultness();
let kind =
self.parse_item_kind(&mut attrs, mac_allowed, lo, &vis, &mut def, fn_parse_mode)?;
let kind = self.parse_item_kind(
&mut attrs,
mac_allowed,
lo,
&vis,
&mut def,
fn_parse_mode,
Case::Sensitive,
)?;
if let Some((ident, kind)) = kind {
self.error_on_unconsumed_default(def, &kind);
let span = lo.to(self.prev_token.span);
Expand Down Expand Up @@ -205,16 +213,17 @@ impl<'a> Parser<'a> {
vis: &Visibility,
def: &mut Defaultness,
fn_parse_mode: FnParseMode,
case: Case,
) -> PResult<'a, Option<ItemInfo>> {
let def_final = def == &Defaultness::Final;
let mut def = || mem::replace(def, Defaultness::Final);
let mut def_ = || mem::replace(def, Defaultness::Final);

let info = if self.eat_keyword(kw::Use) {
let info = if self.eat_keyword_case(kw::Use, case) {
self.parse_use_item()?
} else if self.check_fn_front_matter(def_final) {
} else if self.check_fn_front_matter(def_final, case) {
// FUNCTION ITEM
let (ident, sig, generics, body) = self.parse_fn(attrs, fn_parse_mode, lo, vis)?;
(ident, ItemKind::Fn(Box::new(Fn { defaultness: def(), sig, generics, body })))
(ident, ItemKind::Fn(Box::new(Fn { defaultness: def_(), sig, generics, body })))
} else if self.eat_keyword(kw::Extern) {
if self.eat_keyword(kw::Crate) {
// EXTERN CRATE
Expand All @@ -225,7 +234,7 @@ impl<'a> Parser<'a> {
}
} else if self.is_unsafe_foreign_mod() {
// EXTERN BLOCK
let unsafety = self.parse_unsafety();
let unsafety = self.parse_unsafety(Case::Sensitive);
self.expect_keyword(kw::Extern)?;
self.parse_item_foreign_mod(attrs, unsafety)?
} else if self.is_static_global() {
Expand All @@ -234,15 +243,15 @@ impl<'a> Parser<'a> {
let m = self.parse_mutability();
let (ident, ty, expr) = self.parse_item_global(Some(m))?;
(ident, ItemKind::Static(ty, m, expr))
} else if let Const::Yes(const_span) = self.parse_constness() {
} else if let Const::Yes(const_span) = self.parse_constness(Case::Sensitive) {
// CONST ITEM
if self.token.is_keyword(kw::Impl) {
// recover from `const impl`, suggest `impl const`
self.recover_const_impl(const_span, attrs, def())?
self.recover_const_impl(const_span, attrs, def_())?
} else {
self.recover_const_mut(const_span);
let (ident, ty, expr) = self.parse_item_global(None)?;
(ident, ItemKind::Const(def(), ty, expr))
(ident, ItemKind::Const(def_(), ty, expr))
}
} else if self.check_keyword(kw::Trait) || self.check_auto_or_unsafe_trait_item() {
// TRAIT ITEM
Expand All @@ -251,15 +260,15 @@ impl<'a> Parser<'a> {
|| self.check_keyword(kw::Unsafe) && self.is_keyword_ahead(1, &[kw::Impl])
{
// IMPL ITEM
self.parse_item_impl(attrs, def())?
self.parse_item_impl(attrs, def_())?
} else if self.check_keyword(kw::Mod)
|| self.check_keyword(kw::Unsafe) && self.is_keyword_ahead(1, &[kw::Mod])
{
// MODULE ITEM
self.parse_item_mod(attrs)?
} else if self.eat_keyword(kw::Type) {
// TYPE ITEM
self.parse_type_alias(def())?
self.parse_type_alias(def_())?
} else if self.eat_keyword(kw::Enum) {
// ENUM ITEM
self.parse_item_enum()?
Expand All @@ -286,6 +295,19 @@ impl<'a> Parser<'a> {
} else if self.isnt_macro_invocation() && vis.kind.is_pub() {
self.recover_missing_kw_before_item()?;
return Ok(None);
} else if self.isnt_macro_invocation() && case == Case::Sensitive {
_ = def_;

// Recover wrong cased keywords
return self.parse_item_kind(
attrs,
macros_allowed,
lo,
vis,
def,
fn_parse_mode,
Case::Insensitive,
);
} else if macros_allowed && self.check_path() {
// MACRO INVOCATION ITEM
(Ident::empty(), ItemKind::MacCall(P(self.parse_item_macro(vis)?)))
Expand Down Expand Up @@ -538,7 +560,7 @@ impl<'a> Parser<'a> {
attrs: &mut AttrVec,
defaultness: Defaultness,
) -> PResult<'a, ItemInfo> {
let unsafety = self.parse_unsafety();
let unsafety = self.parse_unsafety(Case::Sensitive);
self.expect_keyword(kw::Impl)?;

// First, parse generic parameters if necessary.
Expand All @@ -552,7 +574,7 @@ impl<'a> Parser<'a> {
generics
};

let constness = self.parse_constness();
let constness = self.parse_constness(Case::Sensitive);
if let Const::Yes(span) = constness {
self.sess.gated_spans.gate(sym::const_trait_impl, span);
}
Expand Down Expand Up @@ -796,7 +818,7 @@ impl<'a> Parser<'a> {

/// Parses `unsafe? auto? trait Foo { ... }` or `trait Foo = Bar;`.
fn parse_item_trait(&mut self, attrs: &mut AttrVec, lo: Span) -> PResult<'a, ItemInfo> {
let unsafety = self.parse_unsafety();
let unsafety = self.parse_unsafety(Case::Sensitive);
// Parse optional `auto` prefix.
let is_auto = if self.eat_keyword(kw::Auto) { IsAuto::Yes } else { IsAuto::No };

Expand Down Expand Up @@ -1745,7 +1767,7 @@ impl<'a> Parser<'a> {
let (ident, is_raw) = self.ident_or_err()?;
if !is_raw && ident.is_reserved() {
let snapshot = self.create_snapshot_for_diagnostic();
let err = if self.check_fn_front_matter(false) {
let err = if self.check_fn_front_matter(false, Case::Sensitive) {
let inherited_vis = Visibility {
span: rustc_span::DUMMY_SP,
kind: VisibilityKind::Inherited,
Expand Down Expand Up @@ -2134,7 +2156,7 @@ impl<'a> Parser<'a> {
///
/// `check_pub` adds additional `pub` to the checks in case users place it
/// wrongly, can be used to ensure `pub` never comes after `default`.
pub(super) fn check_fn_front_matter(&mut self, check_pub: bool) -> bool {
pub(super) fn check_fn_front_matter(&mut self, check_pub: bool, case: Case) -> bool {
// We use an over-approximation here.
// `const const`, `fn const` won't parse, but we're not stepping over other syntax either.
// `pub` is added in case users got confused with the ordering like `async pub fn`,
Expand All @@ -2144,23 +2166,30 @@ impl<'a> Parser<'a> {
} else {
&[kw::Const, kw::Async, kw::Unsafe, kw::Extern]
};
self.check_keyword(kw::Fn) // Definitely an `fn`.
self.check_keyword_case(kw::Fn, case) // Definitely an `fn`.
// `$qual fn` or `$qual $qual`:
|| quals.iter().any(|&kw| self.check_keyword(kw))
|| quals.iter().any(|&kw| self.check_keyword_case(kw, case))
&& self.look_ahead(1, |t| {
// `$qual fn`, e.g. `const fn` or `async fn`.
t.is_keyword(kw::Fn)
t.is_keyword_case(kw::Fn, case)
// Two qualifiers `$qual $qual` is enough, e.g. `async unsafe`.
|| t.is_non_raw_ident_where(|i| quals.contains(&i.name)
// Rule out 2015 `const async: T = val`.
&& i.is_reserved()
|| (
(
t.is_non_raw_ident_where(|i|
quals.contains(&i.name)
// Rule out 2015 `const async: T = val`.
&& i.is_reserved()
)
|| case == Case::Insensitive
&& t.is_non_raw_ident_where(|i| quals.iter().any(|qual| qual.as_str() == i.name.as_str().to_lowercase()))
)
// Rule out unsafe extern block.
&& !self.is_unsafe_foreign_mod())
})
// `extern ABI fn`
|| self.check_keyword(kw::Extern)
|| self.check_keyword_case(kw::Extern, case)
&& self.look_ahead(1, |t| t.can_begin_literal_maybe_minus())
&& self.look_ahead(2, |t| t.is_keyword(kw::Fn))
&& self.look_ahead(2, |t| t.is_keyword_case(kw::Fn, case))
}

/// Parses all the "front matter" (or "qualifiers") for a `fn` declaration,
Expand All @@ -2176,22 +2205,22 @@ impl<'a> Parser<'a> {
/// `Visibility::Inherited` when no visibility is known.
pub(super) fn parse_fn_front_matter(&mut self, orig_vis: &Visibility) -> PResult<'a, FnHeader> {
let sp_start = self.token.span;
let constness = self.parse_constness();
let constness = self.parse_constness(Case::Insensitive);

let async_start_sp = self.token.span;
let asyncness = self.parse_asyncness();
let asyncness = self.parse_asyncness(Case::Insensitive);

let unsafe_start_sp = self.token.span;
let unsafety = self.parse_unsafety();
let unsafety = self.parse_unsafety(Case::Insensitive);

let ext_start_sp = self.token.span;
let ext = self.parse_extern();
let ext = self.parse_extern(Case::Insensitive);

if let Async::Yes { span, .. } = asyncness {
self.ban_async_in_2015(span);
}

if !self.eat_keyword(kw::Fn) {
if !self.eat_keyword_case(kw::Fn, Case::Insensitive) {
// It is possible for `expect_one_of` to recover given the contents of
// `self.expected_tokens`, therefore, do not use `self.unexpected()` which doesn't
// account for this.
Expand Down
58 changes: 50 additions & 8 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::AttributesData;
use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_ast::util::case::Case;
use rustc_ast::AttrId;
use rustc_ast::DUMMY_NODE_ID;
use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, Extern};
Expand Down Expand Up @@ -604,6 +605,20 @@ impl<'a> Parser<'a> {
self.token.is_keyword(kw)
}

fn check_keyword_case(&mut self, kw: Symbol, case: Case) -> bool {
if self.check_keyword(kw) {
return true;
}

if case == Case::Insensitive
&& let Some((ident, /* is_raw */ false)) = self.token.ident()
&& ident.as_str().to_lowercase() == kw.as_str().to_lowercase() {
true
} else {
false
}
Comment on lines +613 to +619
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any obvious reason for the slow down, but can you try putting up PRs that revert some of these changes independently? Let's try to find if there's a single method that's introducing most of the perf regression (and it very well could be that it's just worse cache locality due to change in method inlining or arguments or something "silly" like that).

}

/// If the next token is the given keyword, eats it and returns `true`.
/// Otherwise, returns `false`. An expectation is also added for diagnostics purposes.
// Public for rustfmt usage.
Expand All @@ -616,6 +631,33 @@ impl<'a> Parser<'a> {
}
}

/// Eats a keyword, optionally ignoring the case.
/// If the case differs (and is ignored) an error is issued.
/// This is useful for recovery.
fn eat_keyword_case(&mut self, kw: Symbol, case: Case) -> bool {
if self.eat_keyword(kw) {
return true;
}

if case == Case::Insensitive
&& let Some((ident, /* is_raw */ false)) = self.token.ident()
&& ident.as_str().to_lowercase() == kw.as_str().to_lowercase() {
Comment on lines +643 to +644
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
&& let Some((ident, /* is_raw */ false)) = self.token.ident()
&& ident.as_str().to_lowercase() == kw.as_str().to_lowercase() {
&& let Some((ident, /* is_raw */ false)) = self.token.ident()
&& ident.as_str().to_lowercase() == kw.as_str().to_lowercase()
{

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I'm mildly amused at the thought of introducing a CamelCased keyword 😄

self
.struct_span_err(ident.span, format!("keyword `{kw}` is written in a wrong case"))
.span_suggestion(
ident.span,
"write it in the correct case",
kw,
Applicability::MachineApplicable
).emit();

self.bump();
return true;
}

false
}

fn eat_keyword_noexpect(&mut self, kw: Symbol) -> bool {
if self.token.is_keyword(kw) {
self.bump();
Expand Down Expand Up @@ -1095,8 +1137,8 @@ impl<'a> Parser<'a> {
}

/// Parses asyncness: `async` or nothing.
fn parse_asyncness(&mut self) -> Async {
if self.eat_keyword(kw::Async) {
fn parse_asyncness(&mut self, case: Case) -> Async {
if self.eat_keyword_case(kw::Async, case) {
let span = self.prev_token.uninterpolated_span();
Async::Yes { span, closure_id: DUMMY_NODE_ID, return_impl_trait_id: DUMMY_NODE_ID }
} else {
Expand All @@ -1105,19 +1147,19 @@ impl<'a> Parser<'a> {
}

/// Parses unsafety: `unsafe` or nothing.
fn parse_unsafety(&mut self) -> Unsafe {
if self.eat_keyword(kw::Unsafe) {
fn parse_unsafety(&mut self, case: Case) -> Unsafe {
if self.eat_keyword_case(kw::Unsafe, case) {
Unsafe::Yes(self.prev_token.uninterpolated_span())
} else {
Unsafe::No
}
}

/// Parses constness: `const` or nothing.
fn parse_constness(&mut self) -> Const {
fn parse_constness(&mut self, case: Case) -> Const {
// Avoid const blocks to be parsed as const items
if self.look_ahead(1, |t| t != &token::OpenDelim(Delimiter::Brace))
&& self.eat_keyword(kw::Const)
&& self.eat_keyword_case(kw::Const, case)
{
Const::Yes(self.prev_token.uninterpolated_span())
} else {
Expand Down Expand Up @@ -1372,8 +1414,8 @@ impl<'a> Parser<'a> {
}

/// Parses `extern string_literal?`.
fn parse_extern(&mut self) -> Extern {
if self.eat_keyword(kw::Extern) {
fn parse_extern(&mut self, case: Case) -> Extern {
if self.eat_keyword_case(kw::Extern, case) {
let mut extern_span = self.prev_token.span;
let abi = self.parse_abi();
if let Some(abi) = abi {
Expand Down
Loading