Skip to content

Commit

Permalink
Add Node::unparse and tweak the cursor creation API (#666)
Browse files Browse the repository at this point in the history
Closes #583
Ref #628 for the modified cursor/offset API

Changes are best reviewed commit-by-commit.
  • Loading branch information
Xanewok authored Nov 22, 2023
1 parent 54af80d commit 0434b68
Show file tree
Hide file tree
Showing 28 changed files with 249 additions and 146 deletions.
5 changes: 5 additions & 0 deletions .changeset/strange-hats-itch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": minor
---

Add `Node::unparse()` that allows to reconstruct the source code from the CST node
1 change: 1 addition & 0 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"structs",
"tera",
"ufixed",
"unparse",
"usize"
]
}
86 changes: 53 additions & 33 deletions crates/codegen/parser/runtime/src/cst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,63 +54,83 @@ impl Node {
}
}

pub fn create_cursor(&self, text_offset: TextIndex) -> Cursor {
/// Creates a [`Cursor`] that starts at the current node as the root and a given initial `text_offset`.
pub fn cursor_with_offset(&self, text_offset: TextIndex) -> Cursor {
Cursor::new(self.clone(), text_offset)
}

/// Reconstructs the original source code from the parse tree.
pub fn unparse(self) -> String {
match self {
Self::Rule(rule) => rule.unparse(),
Self::Token(token) => token.text.clone(),
}
}

pub fn as_rule(&self) -> Option<&Rc<RuleNode>> {
match self {
Self::Rule(node) => Some(node),
_ => None,
}
}

pub fn into_rule(self) -> Option<Rc<RuleNode>> {
match self {
Self::Rule(node) => Some(node),
_ => None,
}
}

pub fn as_token(&self) -> Option<&Rc<TokenNode>> {
match self {
Self::Token(node) => Some(node),
_ => None,
}
}

pub fn as_token_with_kind(&self, kinds: &[TokenKind]) -> Option<&Rc<TokenNode>> {
if let Node::Token(token_node) = self {
if kinds.contains(&token_node.kind) {
return Some(token_node);
}
pub fn into_token(self) -> Option<Rc<TokenNode>> {
match self {
Self::Token(node) => Some(node),
_ => None,
}
return None;
}

pub fn as_token_matching<F: Fn(&Rc<TokenNode>) -> bool>(
&self,
predicate: F,
) -> Option<&Rc<TokenNode>> {
if let Node::Token(token_node) = self {
if predicate(&token_node) {
return Some(token_node);
}
}
return None;
pub fn as_token_with_kind(&self, kinds: &[TokenKind]) -> Option<&Rc<TokenNode>> {
self.as_token().filter(|token| kinds.contains(&token.kind))
}

pub fn as_rule_with_kind(&self, kinds: &[RuleKind]) -> Option<&Rc<RuleNode>> {
if let Node::Rule(rule_node) = self {
if kinds.contains(&rule_node.kind) {
return Some(rule_node);
}
}
return None;
self.as_rule().filter(|rule| kinds.contains(&rule.kind))
}
}

pub fn as_rule_matching<F: Fn(&Rc<RuleNode>) -> bool>(
&self,
predicate: F,
) -> Option<&Rc<RuleNode>> {
if let Node::Rule(rule_node) = self {
if predicate(&rule_node) {
return Some(rule_node);
}
}
return None;
impl From<Rc<RuleNode>> for Node {
fn from(node: Rc<RuleNode>) -> Self {
Self::Rule(node)
}
}

impl From<Rc<TokenNode>> for Node {
fn from(node: Rc<TokenNode>) -> Self {
Self::Token(node)
}
}

impl RuleNode {
/// Creates a [`Cursor`] that starts at the current node as the root and a given initial `text_offset`.
pub fn cursor_with_offset(self: Rc<Self>, text_offset: TextIndex) -> Cursor {
Cursor::new(Node::Rule(self), text_offset)
}

/// Reconstructs the original source code from the parse tree.
pub fn unparse(self: Rc<Self>) -> String {
let acc = String::with_capacity(self.text_len.utf8);

self.cursor_with_offset(TextIndex::ZERO)
.filter_map(Node::into_token)
.fold(acc, |mut acc, token| {
acc.push_str(&token.text);
acc
})
}
}
6 changes: 3 additions & 3 deletions crates/codegen/parser/runtime/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ impl Cursor {
None
}

/// In contract to `Iterator::find_*`, this does not consume the first item when found.
/// In contrast to `Iterator::find_*`, this does not consume the first item when found.
fn find_noconsume<F: Fn(&Node) -> Option<R>, R>(&mut self, predicate: F) -> Option<R> {
while !self.is_completed {
match predicate(&self.current.node) {
Expand Down Expand Up @@ -398,7 +398,7 @@ impl Cursor {
&mut self,
predicate: F,
) -> Option<Rc<TokenNode>> {
self.find_noconsume(|node| node.as_token_matching(&predicate).cloned())
self.find_noconsume(|node| node.as_token().filter(|node| predicate(node)).cloned())
}

/// Finds the first rule node with either of the given kinds.
Expand All @@ -415,6 +415,6 @@ impl Cursor {
&mut self,
predicate: F,
) -> Option<Rc<RuleNode>> {
self.find_noconsume(|node| node.as_rule_matching(&predicate).cloned())
self.find_noconsume(|node| node.as_rule().filter(|node| predicate(node)).cloned())
}
}
4 changes: 2 additions & 2 deletions crates/codegen/parser/runtime/src/napi/napi_cst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl RuleNode {
#[napi(ts_return_type = "cursor.Cursor")]
pub fn create_cursor(&self, text_offset: TextIndex) -> Cursor {
RustNode::Rule(self.0.clone())
.create_cursor((&text_offset).into())
.cursor_with_offset((&text_offset).into())
.into()
}
}
Expand Down Expand Up @@ -88,7 +88,7 @@ impl TokenNode {
#[napi(ts_return_type = "cursor.Cursor")]
pub fn create_cursor(&self, text_offset: TextIndex) -> Cursor {
RustNode::Token(self.0.clone())
.create_cursor((&text_offset).into())
.cursor_with_offset((&text_offset).into())
.into()
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/codegen/parser/runtime/src/parse_output.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{cst, cursor::Cursor, parse_error::ParseError};
use crate::{cst, cursor::Cursor, parse_error::ParseError, text_index::TextIndex};

#[derive(Debug, PartialEq)]
pub struct ParseOutput {
Expand All @@ -21,6 +21,6 @@ impl ParseOutput {

/// Creates a cursor that starts at the root of the parse tree.
pub fn create_tree_cursor(&self) -> Cursor {
return self.parse_tree.create_cursor(Default::default());
return self.parse_tree.cursor_with_offset(TextIndex::ZERO);
}
}
2 changes: 1 addition & 1 deletion crates/codegen/parser/runtime/src/support/choice_helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ pub fn total_not_skipped_span(result: &ParserResult) -> usize {

nodes
.iter()
.flat_map(|node| cst::Node::create_cursor(node, Default::default()))
.flat_map(|node| cst::Node::cursor_with_offset(node, TextIndex::ZERO))
.filter_map(|node| match node {
cst::Node::Token(token) if token.kind != TokenKind::SKIPPED => Some(token.text.len()),
_ => None,
Expand Down
7 changes: 5 additions & 2 deletions crates/codegen/parser/runtime/src/support/parser_function.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use std::rc::Rc;

use super::{
super::{cst, kinds::TokenKind, parse_error::ParseError, parse_output::ParseOutput},
super::{
cst, kinds::TokenKind, parse_error::ParseError, parse_output::ParseOutput,
text_index::TextIndex,
},
context::ParserContext,
parser_result::*,
};
Expand Down Expand Up @@ -95,7 +98,7 @@ where
debug_assert_eq!(
errors.len() > 0,
parse_tree
.create_cursor(Default::default())
.cursor_with_offset(TextIndex::ZERO)
.any(|x| x.as_token_with_kind(&[TokenKind::SKIPPED]).is_some())
);

Expand Down
4 changes: 2 additions & 2 deletions crates/codegen/parser/runtime/src/support/parser_result.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::super::{cst, kinds::*};
use super::super::{cst, kinds::*, text_index::TextIndex};

#[derive(PartialEq, Eq, Clone, Debug)]
pub enum ParserResult {
Expand Down Expand Up @@ -93,7 +93,7 @@ impl Match {
pub fn is_full_recursive(&self) -> bool {
self.nodes
.iter()
.flat_map(|node| cst::Node::create_cursor(node, Default::default()))
.flat_map(|node| cst::Node::cursor_with_offset(node, TextIndex::ZERO))
.all(|node| node.as_token_with_kind(&[TokenKind::SKIPPED]).is_none())
}
}
Expand Down
9 changes: 9 additions & 0 deletions crates/codegen/parser/runtime/src/text_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@ pub struct TextIndex {
pub char: usize,
}

impl TextIndex {
/// Shorthand for `TextIndex { utf8: 0, utf16: 0, char: 0 }`.
pub const ZERO: TextIndex = TextIndex {
utf8: 0,
utf16: 0,
char: 0,
};
}

impl PartialOrd for TextIndex {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.utf8.partial_cmp(&other.utf8)
Expand Down
86 changes: 53 additions & 33 deletions crates/solidity/outputs/cargo/crate/src/generated/cst.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions crates/solidity/outputs/cargo/crate/src/generated/cursor.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 0434b68

Please sign in to comment.