diff --git a/Cargo.lock b/Cargo.lock index 6fe210ab93a..7a631368870 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1981,6 +1981,7 @@ dependencies = [ "noirc_errors", "rustc-hash", "serde", + "small-ord-set", "smol_str", "strum", "strum_macros", @@ -2898,6 +2899,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "small-ord-set" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf7035a2b2268a5be8c1395738565b06beda836097e12021cdefc06b127a0e7e" +dependencies = [ + "smallvec", +] + [[package]] name = "smallvec" version = "1.10.0" diff --git a/crates/noirc_frontend/Cargo.toml b/crates/noirc_frontend/Cargo.toml index b5551d17f51..f3fc1c83758 100644 --- a/crates/noirc_frontend/Cargo.toml +++ b/crates/noirc_frontend/Cargo.toml @@ -18,6 +18,7 @@ thiserror.workspace = true smol_str.workspace = true serde.workspace = true rustc-hash = "1.1.0" +small-ord-set = "0.1.3" [dev-dependencies] strum = "0.24" diff --git a/crates/noirc_frontend/src/ast/mod.rs b/crates/noirc_frontend/src/ast/mod.rs index 37eb944e0c6..24004e34ffa 100644 --- a/crates/noirc_frontend/src/ast/mod.rs +++ b/crates/noirc_frontend/src/ast/mod.rs @@ -152,6 +152,9 @@ pub enum Signedness { } impl UnresolvedTypeExpression { + // This large error size is justified because it improves parsing speeds by around 40% in + // release mode. See `ParserError` definition for further explanation. + #[allow(clippy::result_large_err)] pub fn from_expr( expr: Expression, span: Span, diff --git a/crates/noirc_frontend/src/hir/resolution/errors.rs b/crates/noirc_frontend/src/hir/resolution/errors.rs index c57e4c890d2..87257cbb842 100644 --- a/crates/noirc_frontend/src/hir/resolution/errors.rs +++ b/crates/noirc_frontend/src/hir/resolution/errors.rs @@ -57,7 +57,7 @@ pub enum ResolverError { #[error("Incorrect amount of arguments to generic type constructor")] IncorrectGenericCount { span: Span, struct_type: String, actual: usize, expected: usize }, #[error("{0}")] - ParserError(ParserError), + ParserError(Box), #[error("Function is not defined in a contract yet sets its contract visibility")] ContractFunctionTypeInNormalFunction { span: Span }, } @@ -252,7 +252,7 @@ impl From for Diagnostic { span, ) } - ResolverError::ParserError(error) => error.into(), + ResolverError::ParserError(error) => (*error).into(), ResolverError::ContractFunctionTypeInNormalFunction { span } => Diagnostic::simple_error( "Only functions defined within contracts can set their contract function type".into(), "Non-contract functions cannot be 'open'".into(), diff --git a/crates/noirc_frontend/src/hir/resolution/resolver.rs b/crates/noirc_frontend/src/hir/resolution/resolver.rs index f03bcefeb2d..d80bca9df17 100644 --- a/crates/noirc_frontend/src/hir/resolution/resolver.rs +++ b/crates/noirc_frontend/src/hir/resolution/resolver.rs @@ -859,7 +859,7 @@ impl<'a> Resolver<'a> { let span = length.span; let length = UnresolvedTypeExpression::from_expr(*length, span).unwrap_or_else( |error| { - self.errors.push(ResolverError::ParserError(error)); + self.errors.push(ResolverError::ParserError(Box::new(error))); UnresolvedTypeExpression::Constant(0, span) }, ); diff --git a/crates/noirc_frontend/src/lexer/token.rs b/crates/noirc_frontend/src/lexer/token.rs index bfcd0f4be51..fe0e3bf1f90 100644 --- a/crates/noirc_frontend/src/lexer/token.rs +++ b/crates/noirc_frontend/src/lexer/token.rs @@ -189,7 +189,7 @@ impl fmt::Display for Token { } } -#[derive(PartialEq, Eq, Hash, Debug, Clone)] +#[derive(PartialEq, Eq, Hash, Debug, Clone, Ord, PartialOrd)] /// The different kinds of tokens that are possible in the target language pub enum TokenKind { Token(Token), diff --git a/crates/noirc_frontend/src/parser/errors.rs b/crates/noirc_frontend/src/parser/errors.rs index c339835fbc3..d4a294482a8 100644 --- a/crates/noirc_frontend/src/parser/errors.rs +++ b/crates/noirc_frontend/src/parser/errors.rs @@ -1,13 +1,14 @@ -use std::collections::BTreeSet; - use crate::lexer::token::Token; use crate::Expression; +use small_ord_set::SmallOrdSet; use thiserror::Error; use iter_extended::vecmap; use noirc_errors::CustomDiagnostic as Diagnostic; use noirc_errors::Span; +use super::labels::ParsingRuleLabel; + #[derive(Debug, Clone, PartialEq, Eq, Error)] pub enum ParserErrorReason { #[error("Arrays must have at least one element")] @@ -22,10 +23,22 @@ pub enum ParserErrorReason { InvalidArrayLengthExpression(Expression), } +/// Represents a parsing error, or a parsing error in the making. +/// +/// `ParserError` is used extensively by the parser, as it not only used to report badly formed +/// token streams, but also as a general intermediate that accumulates information as various +/// parsing rules are tried. This struct is constructed and destructed with a very high frequency +/// and as such, the time taken to do so significantly impacts parsing performance. For this +/// reason we use `SmallOrdSet` to avoid heap allocations for as long as possible - this greatly +/// inflates the size of the error, but this is justified by a resulting increase in parsing +/// speeds of approximately 40% in release mode. +/// +/// Both `expected_tokens` and `expected_labels` use `SmallOrdSet` sized 1. In the of labels this +/// is optimal. In the of tokens we stop here due to fast diminishing returns. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserError { - expected_tokens: BTreeSet, - expected_labels: BTreeSet, + expected_tokens: SmallOrdSet<[Token; 1]>, + expected_labels: SmallOrdSet<[ParsingRuleLabel; 1]>, found: Token, reason: Option, span: Span, @@ -34,21 +47,15 @@ pub struct ParserError { impl ParserError { pub fn empty(found: Token, span: Span) -> ParserError { ParserError { - expected_tokens: BTreeSet::new(), - expected_labels: BTreeSet::new(), + expected_tokens: SmallOrdSet::new(), + expected_labels: SmallOrdSet::new(), found, reason: None, span, } } - pub fn expected(token: Token, found: Token, span: Span) -> ParserError { - let mut error = ParserError::empty(found, span); - error.expected_tokens.insert(token); - error - } - - pub fn expected_label(label: String, found: Token, span: Span) -> ParserError { + pub fn expected_label(label: ParsingRuleLabel, found: Token, span: Span) -> ParserError { let mut error = ParserError::empty(found, span); error.expected_labels.insert(label); error @@ -64,7 +71,7 @@ impl ParserError { impl std::fmt::Display for ParserError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut expected = vecmap(&self.expected_tokens, ToString::to_string); - expected.append(&mut vecmap(&self.expected_labels, Clone::clone)); + expected.append(&mut vecmap(&self.expected_labels, |label| format!("{label}"))); if expected.is_empty() { write!(f, "Unexpected {} in input", self.found) @@ -112,7 +119,7 @@ impl From for Diagnostic { impl chumsky::Error for ParserError { type Span = Span; - type Label = String; + type Label = ParsingRuleLabel; fn expected_input_found(span: Self::Span, expected: Iter, found: Option) -> Self where @@ -120,7 +127,7 @@ impl chumsky::Error for ParserError { { ParserError { expected_tokens: expected.into_iter().map(|opt| opt.unwrap_or(Token::EOF)).collect(), - expected_labels: BTreeSet::new(), + expected_labels: SmallOrdSet::new(), found: found.unwrap_or(Token::EOF), reason: None, span, diff --git a/crates/noirc_frontend/src/parser/labels.rs b/crates/noirc_frontend/src/parser/labels.rs new file mode 100644 index 00000000000..b43c10fb9e7 --- /dev/null +++ b/crates/noirc_frontend/src/parser/labels.rs @@ -0,0 +1,42 @@ +use std::fmt; + +use crate::token::TokenKind; + +/// Used to annotate parsing rules with extra context that can be presented to the user later in +/// the case of an error. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum ParsingRuleLabel { + Atom, + BinaryOperator, + Cast, + Expression, + FieldAccess, + Global, + IntegerType, + Parameter, + Pattern, + Statement, + Term, + TypeExpression, + TokenKind(TokenKind), +} + +impl fmt::Display for ParsingRuleLabel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ParsingRuleLabel::Atom => write!(f, "atom"), + ParsingRuleLabel::BinaryOperator => write!(f, "binary operator"), + ParsingRuleLabel::Cast => write!(f, "cast"), + ParsingRuleLabel::Expression => write!(f, "expression"), + ParsingRuleLabel::FieldAccess => write!(f, "field access"), + ParsingRuleLabel::Global => write!(f, "global"), + ParsingRuleLabel::IntegerType => write!(f, "integer type"), + ParsingRuleLabel::Parameter => write!(f, "parameter"), + ParsingRuleLabel::Pattern => write!(f, "pattern"), + ParsingRuleLabel::Statement => write!(f, "statement"), + ParsingRuleLabel::Term => write!(f, "term"), + ParsingRuleLabel::TypeExpression => write!(f, "type expression"), + ParsingRuleLabel::TokenKind(token_kind) => write!(f, "{:?}", token_kind), + } + } +} diff --git a/crates/noirc_frontend/src/parser/mod.rs b/crates/noirc_frontend/src/parser/mod.rs index 98b7fffbf14..a8b7f43fa5c 100644 --- a/crates/noirc_frontend/src/parser/mod.rs +++ b/crates/noirc_frontend/src/parser/mod.rs @@ -7,6 +7,7 @@ //! This file is mostly helper functions and types for the parser. For the parser itself, //! see parser.rs. The definition of the abstract syntax tree can be found in the `ast` folder. mod errors; +mod labels; #[allow(clippy::module_inception)] mod parser; diff --git a/crates/noirc_frontend/src/parser/parser.rs b/crates/noirc_frontend/src/parser/parser.rs index 3a8c8f49303..98b45247567 100644 --- a/crates/noirc_frontend/src/parser/parser.rs +++ b/crates/noirc_frontend/src/parser/parser.rs @@ -24,9 +24,10 @@ //! be limited to cases like the above `fn` example where it is clear we shouldn't back out of the //! current parser to try alternative parsers in a `choice` expression. use super::{ - foldl_with_span, parameter_name_recovery, parameter_recovery, parenthesized, then_commit, - then_commit_ignore, top_level_statement_recovery, ExprParser, ForRange, NoirParser, - ParsedModule, ParserError, ParserErrorReason, Precedence, SubModule, TopLevelStatement, + foldl_with_span, labels::ParsingRuleLabel, parameter_name_recovery, parameter_recovery, + parenthesized, then_commit, then_commit_ignore, top_level_statement_recovery, ExprParser, + ForRange, NoirParser, ParsedModule, ParserError, ParserErrorReason, Precedence, SubModule, + TopLevelStatement, }; use crate::ast::{Expression, ExpressionKind, LetStatement, Statement, UnresolvedType}; use crate::lexer::Lexer; @@ -113,7 +114,7 @@ fn top_level_statement( /// global_declaration: 'global' ident global_type_annotation '=' literal fn global_declaration() -> impl NoirParser { let p = ignore_then_commit( - keyword(Keyword::Global).labelled("global"), + keyword(Keyword::Global).labelled(ParsingRuleLabel::Global), ident().map(Pattern::Identifier), ); let p = then_commit(p, global_type_annotation()); @@ -273,7 +274,10 @@ fn lambda_parameters() -> impl NoirParser> { .recover_via(parameter_name_recovery()) .then(typ.or_not().map(|typ| typ.unwrap_or(UnresolvedType::Unspecified))); - parameter.separated_by(just(Token::Comma)).allow_trailing().labelled("parameter") + parameter + .separated_by(just(Token::Comma)) + .allow_trailing() + .labelled(ParsingRuleLabel::Parameter) } fn function_parameters<'a>( @@ -292,7 +296,10 @@ fn function_parameters<'a>( let parameter = full_parameter.or(self_parameter); - parameter.separated_by(just(Token::Comma)).allow_trailing().labelled("parameter") + parameter + .separated_by(just(Token::Comma)) + .allow_trailing() + .labelled(ParsingRuleLabel::Parameter) } /// This parser always parses no input and fails @@ -308,7 +315,7 @@ fn self_parameter() -> impl NoirParser<(Pattern, UnresolvedType, AbiVisibility)> let self_type = UnresolvedType::Named(path, vec![]); Ok((Pattern::Identifier(ident), self_type, AbiVisibility::Private)) } - _ => Err(ParserError::expected_label("parameter".to_owned(), found, span)), + _ => Err(ParserError::expected_label(ParsingRuleLabel::Parameter, found, span)), }) } @@ -406,7 +413,11 @@ fn token_kind(token_kind: TokenKind) -> impl NoirParser { if found.kind() == token_kind { Ok(found) } else { - Err(ParserError::expected_label(token_kind.to_string(), found, span)) + Err(ParserError::expected_label( + ParsingRuleLabel::TokenKind(token_kind.clone()), + found, + span, + )) } }) } @@ -446,12 +457,15 @@ fn constrain<'a, P>(expr_parser: P) -> impl NoirParser + 'a where P: ExprParser + 'a, { - ignore_then_commit(keyword(Keyword::Constrain).labelled("statement"), expr_parser) - .map(|expr| Statement::Constrain(ConstrainStatement(expr))) - .validate(|expr, span, emit| { - emit(ParserError::with_reason(ParserErrorReason::ConstrainDeprecated, span)); - expr - }) + ignore_then_commit( + keyword(Keyword::Constrain).labelled(ParsingRuleLabel::Statement), + expr_parser, + ) + .map(|expr| Statement::Constrain(ConstrainStatement(expr))) + .validate(|expr, span, emit| { + emit(ParserError::with_reason(ParserErrorReason::ConstrainDeprecated, span)); + expr + }) } fn assertion<'a, P>(expr_parser: P) -> impl NoirParser + 'a @@ -459,7 +473,7 @@ where P: ExprParser + 'a, { ignore_then_commit(keyword(Keyword::Assert), parenthesized(expr_parser)) - .labelled("statement") + .labelled(ParsingRuleLabel::Statement) .map(|expr| Statement::Constrain(ConstrainStatement(expr))) } @@ -467,7 +481,8 @@ fn declaration<'a, P>(expr_parser: P) -> impl NoirParser + 'a where P: ExprParser + 'a, { - let p = ignore_then_commit(keyword(Keyword::Let).labelled("statement"), pattern()); + let p = + ignore_then_commit(keyword(Keyword::Let).labelled(ParsingRuleLabel::Statement), pattern()); let p = p.then(optional_type_annotation()); let p = then_commit_ignore(p, just(Token::Assign)); let p = then_commit(p, expr_parser); @@ -501,14 +516,15 @@ fn pattern() -> impl NoirParser { choice((mut_pattern, tuple_pattern, struct_pattern, ident_pattern)) }) - .labelled("pattern") + .labelled(ParsingRuleLabel::Pattern) } fn assignment<'a, P>(expr_parser: P) -> impl NoirParser + 'a where P: ExprParser + 'a, { - let fallible = lvalue(expr_parser.clone()).then(assign_operator()).labelled("statement"); + let fallible = + lvalue(expr_parser.clone()).then(assign_operator()).labelled(ParsingRuleLabel::Statement); then_commit(fallible, expr_parser).map_with_span( |((identifier, operator), expression), span| { @@ -623,7 +639,7 @@ fn int_type() -> impl NoirParser { .then(filter_map(|span, token: Token| match token { Token::IntType(int_type) => Ok(int_type), unexpected => { - Err(ParserError::expected_label("integer type".to_string(), unexpected, span)) + Err(ParserError::expected_label(ParsingRuleLabel::IntegerType, unexpected, span)) } })) .map(UnresolvedType::from_int_token) @@ -669,7 +685,7 @@ fn array_type(type_parser: impl NoirParser) -> impl NoirParser impl NoirParser { recursive(|expr| expression_with_precedence(Precedence::lowest_type_precedence(), expr, true)) - .labelled("type expression") + .labelled(ParsingRuleLabel::TypeExpression) .try_map(UnresolvedTypeExpression::from_expr) } @@ -695,7 +711,7 @@ where fn expression() -> impl ExprParser { recursive(|expr| expression_with_precedence(Precedence::Lowest, expr, false)) - .labelled("expression") + .labelled(ParsingRuleLabel::Expression) } // An expression is a single term followed by 0 or more (OP subexpression)* @@ -712,9 +728,9 @@ where { if precedence == Precedence::Highest { if is_type_expression { - type_expression_term(expr_parser).boxed().labelled("term") + type_expression_term(expr_parser).boxed().labelled(ParsingRuleLabel::Term) } else { - term(expr_parser).boxed().labelled("term") + term(expr_parser).boxed().labelled(ParsingRuleLabel::Term) } } else { let next_precedence = @@ -728,7 +744,7 @@ where .then(then_commit(operator_with_precedence(precedence), next_expr).repeated()) .foldl(create_infix_expression) .boxed() - .labelled("expression") + .labelled(ParsingRuleLabel::Expression) } } @@ -753,7 +769,7 @@ fn operator_with_precedence(precedence: Precedence) -> impl NoirParser(expr_parser: P) -> impl NoirParser