Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Precise error location in UNRECOGNIZED by attaching trivia to the expected nonterminal #1172

Merged
merged 12 commits into from
Dec 12, 2024
5 changes: 5 additions & 0 deletions .changeset/smart-cooks-suffer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": minor
---

Improved error recovery, where leading trivia are always parsed and included before an erroneous terminal.
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ pub(crate) trait Lexer {
.is_some_and(|t| t.accepted_as(kind))
{
input.set_position(start);
return ParserResult::no_match(vec![kind]);
return ParserResult::no_match(None, vec![kind]);
}
let end = input.position();

Expand Down Expand Up @@ -129,7 +129,7 @@ pub(crate) trait Lexer {
.is_some_and(|t| t.accepted_as(kind))
{
input.set_position(restore);
return ParserResult::no_match(vec![kind]);
return ParserResult::no_match(None, vec![kind]);
}
let end = input.position();
children.push(Edge::anonymous(Node::terminal(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub struct ChoiceHelper {
impl ChoiceHelper {
pub fn new(input: &mut ParserContext<'_>) -> Self {
Self {
result: ParserResult::no_match(vec![]),
result: ParserResult::default(),
start_position: input.mark(),
recovered_errors: vec![],
last_progress: input.position(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,44 @@ where
ParserResult::PrattOperatorMatch(..) => unreachable!("PrattOperatorMatch is internal"),

ParserResult::NoMatch(no_match) => {
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
// Parse leading trivia (see #1172 for details). One could argue that trivia should be parsed
// just once, and returned in the `NoMatch` structure. However, in rules like (This | That),
// trivia is already parsed twice, one for each branch. And there's a good reason: each branch might
// accept different trivia, so it's not clear what the combination of the two rules should return in a
// NoMatch. Therefore, we just parse it again. Note that trivia is anyway cached by the parser (#1119).
let mut trivia_nodes = if let ParserResult::Match(matched) =
Lexer::leading_trivia(parser, &mut stream)
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
{
matched.nodes
} else {
vec![]
};

let mut start = TextIndex::ZERO;
for edge in &trivia_nodes {
if let Node::Terminal(terminal) = &edge.node {
if terminal.kind.is_valid() {
start.advance_str(terminal.text.as_str());
}
}
}
let input = &input[start.utf8..];
let kind = if input.is_empty() {
TerminalKind::MISSING
} else {
TerminalKind::UNRECOGNIZED
};

let node = Node::terminal(kind, input.to_string());
let tree = if no_match.kind.is_none() || start.utf8 == 0 {
node
} else {
trivia_nodes.push(Edge::anonymous(node));
Node::nonterminal(no_match.kind.unwrap(), trivia_nodes)
};
ParseOutput {
parse_tree: Node::terminal(kind, input.to_string()),
parse_tree: tree,
errors: vec![ParseError::new(
TextIndex::ZERO..input.into(),
start..start + input.into(),
no_match.expected_terminals,
)],
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub enum ParserResult {
impl Default for ParserResult {
fn default() -> Self {
Self::NoMatch(NoMatch {
kind: None,
expected_terminals: vec![],
})
}
Expand All @@ -34,13 +35,13 @@ impl ParserResult {
ParserResult::IncompleteMatch(IncompleteMatch::new(nodes, expected_terminals))
}

/// Whenever a parser didn't run because it's disabled due to versioning. Shorthand for `no_match(vec![])`.
/// Whenever a parser didn't run because it's disabled due to versioning. Shorthand for `no_match(None, vec![])`.
pub fn disabled() -> Self {
Self::no_match(vec![])
Self::no_match(None, vec![])
}

pub fn no_match(expected_terminals: Vec<TerminalKind>) -> Self {
ParserResult::NoMatch(NoMatch::new(expected_terminals))
pub fn no_match(kind: Option<NonterminalKind>, expected_terminals: Vec<TerminalKind>) -> Self {
ParserResult::NoMatch(NoMatch::new(kind, expected_terminals))
}

#[must_use]
Expand All @@ -61,7 +62,9 @@ impl ParserResult {
nodes: vec![Edge::anonymous(Node::nonterminal(new_kind, skipped.nodes))],
..skipped
}),
ParserResult::NoMatch(_) => self,
ParserResult::NoMatch(no_match) => {
ParserResult::no_match(Some(new_kind), no_match.expected_terminals)
}
ParserResult::PrattOperatorMatch(_) => {
unreachable!("PrattOperatorMatch cannot be converted to a nonterminal")
}
Expand Down Expand Up @@ -240,13 +243,18 @@ impl IncompleteMatch {

#[derive(PartialEq, Eq, Clone, Debug)]
pub struct NoMatch {
/// The nonterminal kind this match is coming from
pub kind: Option<NonterminalKind>,
/// Terminals that would have allowed for more progress. Collected for the purposes of error reporting.
pub expected_terminals: Vec<TerminalKind>,
}

impl NoMatch {
pub fn new(expected_terminals: Vec<TerminalKind>) -> Self {
Self { expected_terminals }
pub fn new(kind: Option<NonterminalKind>, expected_terminals: Vec<TerminalKind>) -> Self {
Self {
kind,
expected_terminals,
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ impl ParserResult {
ParseResultKind::Incomplete => {
ParserResult::incomplete_match(nodes, expected_terminals)
}
ParseResultKind::NoMatch => ParserResult::no_match(expected_terminals),
ParseResultKind::NoMatch => ParserResult::no_match(None, expected_terminals),
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ impl<const MIN_COUNT: usize> RepetitionHelper<MIN_COUNT> {

// Couldn't get a full match but we allow 0 items - return an empty match
// so the parse is considered valid but note the expected terminals
ParserResult::NoMatch(NoMatch { expected_terminals }) if MIN_COUNT == 0 => {
ParserResult::NoMatch(NoMatch {
kind: _,
expected_terminals,
}) if MIN_COUNT == 0 => {
return ParserResult::r#match(vec![], expected_terminals);
}
// Don't try repeating if we don't have a full match and we require at least one
Expand Down Expand Up @@ -60,7 +63,9 @@ impl<const MIN_COUNT: usize> RepetitionHelper<MIN_COUNT> {
ParserResult::IncompleteMatch(IncompleteMatch {
expected_terminals, ..
})
| ParserResult::NoMatch(NoMatch { expected_terminals }),
| ParserResult::NoMatch(NoMatch {
expected_terminals, ..
}),
) => {
input.rewind(save);
running.expected_terminals = expected_terminals;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl SeparatedHelper {
}
ParserResult::NoMatch(no_match) => {
return if accum.is_empty() {
ParserResult::no_match(no_match.expected_terminals)
ParserResult::no_match(None, no_match.expected_terminals)
} else {
ParserResult::incomplete_match(accum, no_match.expected_terminals)
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ impl PrecedenceParserDefinitionCodegen for PrecedenceParserDefinitionRef {
[inner @ cst::Edge { node: cst::Node::Nonterminal(node), .. }] if node.kind == NonterminalKind::#op_nonterminal_name => {
ParserResult::r#match(vec![inner.clone()], r#match.expected_terminals.clone())
}
_ => ParserResult::no_match(vec![]),
_ => ParserResult::default(),
}
_ => ParserResult::no_match(vec![]),
_ => ParserResult::default(),
}
};

Expand Down
1 change: 1 addition & 0 deletions crates/metaslang/cst/generated/public_api.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions crates/metaslang/cst/src/text_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ impl TextIndex {
}
}
}

pub fn advance_str(&mut self, text: &str) {
let mut iter = text.chars().peekable();
while let Some(c) = iter.next() {
let n = iter.peek();
self.advance(c, n);
}
}
}

impl PartialOrd for TextIndex {
Expand All @@ -63,11 +71,7 @@ impl Display for TextIndex {
impl<T: AsRef<str>> From<T> for TextIndex {
fn from(s: T) -> Self {
let mut result = Self::ZERO;
let mut iter = s.as_ref().chars().peekable();
while let Some(c) = iter.next() {
let n = iter.peek();
result.advance(c, n);
}
result.advance_str(s.as_ref());
result
}
}
Expand Down
Loading