Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing ambiguous call options #862

Merged
merged 19 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/shy-pets-attack.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": patch
---

allow call options as a post-fix expression
13 changes: 7 additions & 6 deletions crates/codegen/grammar/src/constructor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ use codegen_language_definition::model::{self, FieldsErrorRecovery, Identifier,
use indexmap::IndexMap;

use crate::{
Grammar, GrammarElement, KeywordScannerDefinition, KeywordScannerDefinitionNode,
KeywordScannerDefinitionVersionedNode, Labeled, ParserDefinition, ParserDefinitionNode,
PrecedenceOperatorModel, PrecedenceParserDefinition, PrecedenceParserDefinitionNode,
ScannerDefinition, ScannerDefinitionNode, TriviaParserDefinition, VersionQuality,
VersionQualityRange,
DelimitedRecoveryTokenThreshold, Grammar, GrammarElement, KeywordScannerDefinition,
KeywordScannerDefinitionNode, KeywordScannerDefinitionVersionedNode, Labeled, ParserDefinition,
ParserDefinitionNode, PrecedenceOperatorModel, PrecedenceParserDefinition,
PrecedenceParserDefinitionNode, ScannerDefinition, ScannerDefinitionNode,
TriviaParserDefinition, VersionQuality, VersionQualityRange,
};

/// Materializes the DSL v2 model ([`model::Language`]) into [`Grammar`].
Expand Down Expand Up @@ -610,7 +610,8 @@ fn resolve_sequence_like(
let open = delims.next().unwrap();
let close = delims.next().unwrap();

ParserDefinitionNode::DelimitedBy(open, Box::new(delimited_body), close)
let threshold = DelimitedRecoveryTokenThreshold::from(delimiters);
ParserDefinitionNode::DelimitedBy(open, Box::new(delimited_body), close, threshold)
};
// Replace with a new delimited node
fields.insert(
Expand Down
30 changes: 28 additions & 2 deletions crates/codegen/grammar/src/parser_definition.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::fmt::Debug;
use std::rc::Rc;

use codegen_language_definition::model;

use crate::visitor::{GrammarVisitor, Visitable};
use crate::{
KeywordScannerDefinitionRef, PrecedenceParserDefinitionRef, ScannerDefinitionRef,
Expand Down Expand Up @@ -53,6 +55,25 @@ impl Visitable for TriviaParserDefinitionRef {
}
}

/// How many tokens have to be matched to trigger the error recovery.
/// For ambiguous syntaxes this needs to be set to at least N, where N
/// is the token lookahead required to disambiguate the syntax.
///
// By default, we assume no lookahead (0) is required to recover from
// unrecognized body between delimiters, so it's always triggered.
#[derive(Clone, Debug, Default)]
pub struct DelimitedRecoveryTokenThreshold(pub u8);

impl From<model::FieldDelimiters> for DelimitedRecoveryTokenThreshold {
fn from(delimiters: model::FieldDelimiters) -> Self {
Self(
delimiters
.tokens_matched_acceptance_threshold
.unwrap_or(DelimitedRecoveryTokenThreshold::default().0),
)
}
}

#[derive(Clone, Debug)]
pub enum ParserDefinitionNode {
Versioned(Box<Self>, Vec<VersionQualityRange>),
Expand All @@ -66,7 +87,12 @@ pub enum ParserDefinitionNode {
TriviaParserDefinition(TriviaParserDefinitionRef),
ParserDefinition(ParserDefinitionRef),
PrecedenceParserDefinition(PrecedenceParserDefinitionRef),
DelimitedBy(Labeled<Box<Self>>, Box<Self>, Labeled<Box<Self>>),
DelimitedBy(
Labeled<Box<Self>>,
Box<Self>,
Labeled<Box<Self>>,
DelimitedRecoveryTokenThreshold,
),
SeparatedBy(Labeled<Box<Self>>, Labeled<Box<Self>>),
TerminatedBy(Box<Self>, Labeled<Box<Self>>),
}
Expand Down Expand Up @@ -115,7 +141,7 @@ impl Visitable for ParserDefinitionNode {
}
}

Self::DelimitedBy(open, body, close) => {
Self::DelimitedBy(open, body, close, ..) => {
open.accept_visitor(visitor);
body.accept_visitor(visitor);
close.accept_visitor(visitor);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ impl ParseInputTokens for usize {
}
}

impl ParseInputTokens for u8 {
fn parse_value(input: ParseStream<'_>, _: &mut ErrorsCollection) -> Result<Self> {
let literal = ParseHelpers::syn::<syn::LitInt>(input)?;
let value = literal.base10_parse::<u8>()?;

Ok(value)
}
}

impl<T: ParseInputTokens> ParseInputTokens for Vec<T> {
fn parse_value(input: ParseStream<'_>, errors: &mut ErrorsCollection) -> Result<Self> {
Ok(ParseHelpers::sequence(input, errors))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ impl WriteOutputTokens for usize {
}
}

impl WriteOutputTokens for u8 {
fn write_output_tokens(&self) -> TokenStream {
let value = Literal::u8_suffixed(*self);

quote! {
#value.into()
}
}
}

impl<T: WriteOutputTokens> WriteOutputTokens for Vec<T> {
fn write_output_tokens(&self) -> TokenStream {
let items = self.iter().map(T::write_output_tokens);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ pub struct FieldsErrorRecovery {
pub struct FieldDelimiters {
pub open: Identifier,
pub close: Identifier,
/// How many tokens have to be matched to trigger the error recovery.
/// For ambiguous syntaxes this needs to be set to at least N, where N
/// is the token lookahead required to disambiguate the syntax.
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
///
/// By default, we assume no lookahead (0) is required to recover from
/// unrecognized body between delimiters, so it's always triggered.
pub tokens_matched_acceptance_threshold: Option<u8>,
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
}

#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ fn get_spanned_type(input: Type) -> Type {
}

// External types should also be wrapped in 'Spanned<T>':
"bool" | "char" | "PathBuf" | "String" | "Version" => {
"bool" | "char" | "PathBuf" | "String" | "u8" | "Version" => {
parse_quote! {
crate::internals::Spanned<#input>
}
Expand Down
8 changes: 5 additions & 3 deletions crates/codegen/parser/generator/src/parser_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
quote! { self.#function_name(input) }
}

Self::DelimitedBy(open, body, close) => {
Self::DelimitedBy(open, body, close, threshold) => {
let open_label = format_ident!("{}", open.label.to_pascal_case());
let close_label = format_ident!("{}", close.label.to_pascal_case());
let [open_delim, close_delim] = match (open.as_ref(), close.as_ref()) {
Expand All @@ -201,6 +201,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
) => [open, close].map(|scanner| format_ident!("{}", scanner.name())),
_ => unreachable!("Only tokens are permitted as delimiters"),
};
let threshold = threshold.0;

let parser = body.to_parser_code(context_name, is_trivia);
let body_parser = body.applicable_version_quality_ranges().wrap_code(
Expand All @@ -209,7 +210,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
.recover_until_with_nested_delims::<_, #lex_ctx>(input,
self,
TokenKind::#close_delim,
RecoverFromNoMatch::Yes,
TokenAcceptanceThreshold(#threshold),
)
)?;
},
Expand Down Expand Up @@ -275,7 +276,8 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
.recover_until_with_nested_delims::<_, #lex_ctx>(input,
self,
TokenKind::#terminator,
RecoverFromNoMatch::No,
// Requires at least a partial match not to risk misparsing
TokenAcceptanceThreshold(1u8),
)
)?;
},
Expand Down
2 changes: 1 addition & 1 deletion crates/codegen/parser/generator/src/rust_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ impl GrammarVisitor for RustGenerator {
}

// Collect delimiters for each context
ParserDefinitionNode::DelimitedBy(open, _, close) => {
ParserDefinitionNode::DelimitedBy(open, _, close, ..) => {
self.labels.insert(open.label.clone());
self.labels.insert(close.label.clone());

Expand Down
2 changes: 1 addition & 1 deletion crates/codegen/parser/runtime/src/parser_support/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub(crate) use parser_result::ParserResult;
#[allow(unused_imports)]
pub(crate) use precedence_helper::PrecedenceHelper;
#[allow(unused_imports)]
pub(crate) use recovery::RecoverFromNoMatch;
pub(crate) use recovery::TokenAcceptanceThreshold;
#[allow(unused_imports)]
pub(crate) use repetition_helper::{OneOrMoreHelper, ZeroOrMoreHelper};
#[allow(unused_imports)]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::ops::ControlFlow;

use crate::cst::{self, LabeledNode};
use crate::cst::{self, LabeledNode, Node};
use crate::kinds::{NodeLabel, RuleKind, TokenKind};
use crate::text_index::TextIndex;

Expand Down Expand Up @@ -201,6 +201,33 @@ impl IncompleteMatch {
expected_tokens,
}
}

/// Whether this prefix-matched at least `n` (non-skipped) significant tokens.
pub fn matches_at_least_n_tokens(&self, n: u8) -> bool {
let result = self
.nodes
.iter()
.flat_map(|node| node.cursor_with_offset(TextIndex::ZERO))
.try_fold(0u8, |mut acc, node| {
match node {
Node::Token(tok) if tok.kind != TokenKind::SKIPPED && !tok.kind.is_trivia() => {
acc += 1;
}
_ => {}
}

// Short-circuit not to walk the whole tree if we've already matched enough
if acc >= n {
ControlFlow::Break(acc)
} else {
ControlFlow::Continue(acc)
}
});

match result {
ControlFlow::Continue(value) | ControlFlow::Break(value) => value >= n,
}
}
}

#[derive(PartialEq, Eq, Clone, Debug)]
Expand Down
33 changes: 15 additions & 18 deletions crates/codegen/parser/runtime/src/parser_support/recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,11 @@ use crate::parser_support::parser_result::SkippedUntil;
use crate::parser_support::ParserResult;
use crate::text_index::{TextRange, TextRangeExtensions as _};

/// An explicit parameter for the [`ParserResult::recover_until_with_nested_delims`] method.
/// How many tokens have to be matched to trigger the error recovery.
/// For ambiguous syntaxes this needs to be set to at least N, where N
/// is the token lookahead required to disambiguate the syntax.
#[derive(Clone, Copy)]
pub(crate) enum RecoverFromNoMatch {
Yes,
No,
}

impl RecoverFromNoMatch {
pub fn as_bool(self) -> bool {
matches!(self, RecoverFromNoMatch::Yes)
}
}
pub(crate) struct TokenAcceptanceThreshold(pub(crate) u8);

fn opt_parse(
input: &mut ParserContext<'_>,
Expand Down Expand Up @@ -46,7 +39,7 @@ impl ParserResult {
input: &mut ParserContext<'_>,
lexer: &L,
expected: TokenKind,
recover_from_no_match: RecoverFromNoMatch,
acceptance_threshold: TokenAcceptanceThreshold,
) -> ParserResult {
enum ParseResultKind {
Match,
Expand All @@ -57,11 +50,15 @@ impl ParserResult {
let before_recovery = input.position();

let (mut nodes, mut expected_tokens, result_kind) = match self {
ParserResult::IncompleteMatch(result) => (
result.nodes,
result.expected_tokens,
ParseResultKind::Incomplete,
),
ParserResult::IncompleteMatch(result)
if result.matches_at_least_n_tokens(acceptance_threshold.0) =>
{
(
result.nodes,
result.expected_tokens,
ParseResultKind::Incomplete,
)
}
ParserResult::Match(result)
if lexer
.peek_token_with_trivia::<LexCtx>(input)
Expand All @@ -70,7 +67,7 @@ impl ParserResult {
{
(result.nodes, result.expected_tokens, ParseResultKind::Match)
}
ParserResult::NoMatch(result) if recover_from_no_match.as_bool() => {
ParserResult::NoMatch(result) if acceptance_threshold.0 == 0 => {
(vec![], result.expected_tokens, ParseResultKind::NoMatch)
}
// No need to recover, so just return as-is.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::napi_interface::parse_output::ParseOutput as NAPIParseOutput;
use crate::parse_output::ParseOutput;
use crate::parser_support::{
ChoiceHelper, OneOrMoreHelper, OptionalHelper, ParserContext, ParserFunction, ParserResult,
PrecedenceHelper, RecoverFromNoMatch, SeparatedHelper, SequenceHelper, ZeroOrMoreHelper,
PrecedenceHelper, SeparatedHelper, SequenceHelper, TokenAcceptanceThreshold, ZeroOrMoreHelper,
};

#[derive(Debug)]
Expand Down
54 changes: 30 additions & 24 deletions crates/solidity/inputs/language/src/definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3378,12 +3378,29 @@ codegen_language_macros::compile!(Language(
name = FunctionCallExpression,
operators = [PrecedenceOperator(
model = Postfix,
fields = (arguments = Required(ArgumentsDeclaration))
)]
),
PrecedenceExpression(
name = CallOptionsExpression,
operators = [PrecedenceOperator(
model = Postfix,
enabled = From("0.6.2"),
error_recovery = FieldsErrorRecovery(
delimiters = FieldDelimiters(
open = open_brace,
close = close_brace,
// NOTE: Despite `CallOptions` requiring at least one element,
// we can only recover if we found at least two tokens (`ident:`)
// in the body, as this may be otherwise ambiguous with
// `try <EXPR> { func() } catch {}`.
tokens_matched_acceptance_threshold = 2
)
),
fields = (
options = Optional(
reference = FunctionCallOptions,
enabled = From("0.6.2")
),
arguments = Required(ArgumentsDeclaration)
open_brace = Required(OpenBrace),
arguments = Required(CallOptions),
Xanewok marked this conversation as resolved.
Show resolved Hide resolved
close_brace = Required(CloseBrace)
)
)]
),
Expand Down Expand Up @@ -3456,20 +3473,6 @@ codegen_language_macros::compile!(Language(
Topic(
title = "Function Calls",
items = [
Enum(
name = FunctionCallOptions,
enabled = From("0.6.2"),
variants = [
EnumVariant(
reference = NamedArgumentGroups,
enabled = Range(from = "0.6.2", till = "0.8.0")
),
EnumVariant(
reference = NamedArgumentGroup,
enabled = From("0.8.0")
)
]
),
Enum(
name = ArgumentsDeclaration,
variants = [
Expand Down Expand Up @@ -3507,11 +3510,6 @@ codegen_language_macros::compile!(Language(
close_paren = Required(CloseParen)
)
),
Repeated(
name = NamedArgumentGroups,
reference = NamedArgumentGroup,
enabled = Range(from = "0.6.2", till = "0.8.0")
),
Struct(
name = NamedArgumentGroup,
error_recovery = FieldsErrorRecovery(
Expand All @@ -3530,6 +3528,14 @@ codegen_language_macros::compile!(Language(
separator = Comma,
allow_empty = true
),
Separated(
name = CallOptions,
reference = NamedArgument,
separator = Comma,
enabled = From("0.6.2"),
// These cannot be empty as they're ambiguous with `try <EXPR> {} catch {}`
allow_empty = false
),
Struct(
name = NamedArgument,
fields = (
Expand Down
Loading
Loading